mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-01-26 16:11:55 +00:00
a5f28fe0c9
Bumps [github.com/gin-contrib/gzip](https://github.com/gin-contrib/gzip) from 1.0.0 to 1.0.1. - [Release notes](https://github.com/gin-contrib/gzip/releases) - [Changelog](https://github.com/gin-contrib/gzip/blob/master/.goreleaser.yaml) - [Commits](https://github.com/gin-contrib/gzip/compare/v1.0.0...v1.0.1) --- updated-dependencies: - dependency-name: github.com/gin-contrib/gzip dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
97211 lines
3.3 MiB
97211 lines
3.3 MiB
//
|
|
// Copyright 2024 CloudWeGo Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
// Code generated by "mkasm_amd64.py", DO NOT EDIT.
|
|
|
|
package x86_64
|
|
|
|
// ADCB performs "Add with Carry".
|
|
//
|
|
// Mnemonic : ADC
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ADCB imm8, al
|
|
// * ADCB imm8, r8
|
|
// * ADCB r8, r8
|
|
// * ADCB m8, r8
|
|
// * ADCB imm8, m8
|
|
// * ADCB r8, m8
|
|
//
|
|
func (self *Program) ADCB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCB", 2, Operands { v0, v1 })
|
|
// ADCB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x14)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADCB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADCL performs "Add with Carry".
|
|
//
|
|
// Mnemonic : ADC
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADCL imm32, eax
|
|
// * ADCL imm8, r32
|
|
// * ADCL imm32, r32
|
|
// * ADCL r32, r32
|
|
// * ADCL m32, r32
|
|
// * ADCL imm8, m32
|
|
// * ADCL imm32, m32
|
|
// * ADCL r32, m32
|
|
//
|
|
func (self *Program) ADCL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCL", 2, Operands { v0, v1 })
|
|
// ADCL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x15)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADCL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADCQ performs "Add with Carry".
|
|
//
|
|
// Mnemonic : ADC
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADCQ imm32, rax
|
|
// * ADCQ imm8, r64
|
|
// * ADCQ imm32, r64
|
|
// * ADCQ r64, r64
|
|
// * ADCQ m64, r64
|
|
// * ADCQ imm8, m64
|
|
// * ADCQ imm32, m64
|
|
// * ADCQ r64, m64
|
|
//
|
|
func (self *Program) ADCQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCQ", 2, Operands { v0, v1 })
|
|
// ADCQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x15)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADCQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADCW performs "Add with Carry".
|
|
//
|
|
// Mnemonic : ADC
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADCW imm16, ax
|
|
// * ADCW imm8, r16
|
|
// * ADCW imm16, r16
|
|
// * ADCW r16, r16
|
|
// * ADCW m16, r16
|
|
// * ADCW imm8, m16
|
|
// * ADCW imm16, m16
|
|
// * ADCW r16, m16
|
|
//
|
|
func (self *Program) ADCW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCW", 2, Operands { v0, v1 })
|
|
// ADCW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x15)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADCW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADCW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADCXL performs "Unsigned Integer Addition of Two Operands with Carry Flag".
|
|
//
|
|
// Mnemonic : ADCX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADCXL r32, r32 [ADX]
|
|
// * ADCXL m32, r32 [ADX]
|
|
//
|
|
func (self *Program) ADCXL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCXL", 2, Operands { v0, v1 })
|
|
// ADCXL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCXL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADCXQ performs "Unsigned Integer Addition of Two Operands with Carry Flag".
|
|
//
|
|
// Mnemonic : ADCX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADCXQ r64, r64 [ADX]
|
|
// * ADCXQ m64, r64 [ADX]
|
|
//
|
|
func (self *Program) ADCXQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADCXQ", 2, Operands { v0, v1 })
|
|
// ADCXQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADCXQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADCXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDB performs "Add".
|
|
//
|
|
// Mnemonic : ADD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ADDB imm8, al
|
|
// * ADDB imm8, r8
|
|
// * ADDB r8, r8
|
|
// * ADDB m8, r8
|
|
// * ADDB imm8, m8
|
|
// * ADDB r8, m8
|
|
//
|
|
func (self *Program) ADDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDB", 2, Operands { v0, v1 })
|
|
// ADDB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x04)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADDB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDL performs "Add".
|
|
//
|
|
// Mnemonic : ADD
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADDL imm32, eax
|
|
// * ADDL imm8, r32
|
|
// * ADDL imm32, r32
|
|
// * ADDL r32, r32
|
|
// * ADDL m32, r32
|
|
// * ADDL imm8, m32
|
|
// * ADDL imm32, m32
|
|
// * ADDL r32, m32
|
|
//
|
|
func (self *Program) ADDL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDL", 2, Operands { v0, v1 })
|
|
// ADDL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x05)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADDL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDPD performs "Add Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ADDPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDPD xmm, xmm [SSE2]
|
|
// * ADDPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) ADDPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDPD", 2, Operands { v0, v1 })
|
|
// ADDPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDPS performs "Add Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ADDPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDPS xmm, xmm [SSE]
|
|
// * ADDPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) ADDPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDPS", 2, Operands { v0, v1 })
|
|
// ADDPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDQ performs "Add".
|
|
//
|
|
// Mnemonic : ADD
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADDQ imm32, rax
|
|
// * ADDQ imm8, r64
|
|
// * ADDQ imm32, r64
|
|
// * ADDQ r64, r64
|
|
// * ADDQ m64, r64
|
|
// * ADDQ imm8, m64
|
|
// * ADDQ imm32, m64
|
|
// * ADDQ r64, m64
|
|
//
|
|
func (self *Program) ADDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDQ", 2, Operands { v0, v1 })
|
|
// ADDQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x05)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADDQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDSD performs "Add Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ADDSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDSD xmm, xmm [SSE2]
|
|
// * ADDSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) ADDSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDSD", 2, Operands { v0, v1 })
|
|
// ADDSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDSS performs "Add Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ADDSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDSS xmm, xmm [SSE]
|
|
// * ADDSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) ADDSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDSS", 2, Operands { v0, v1 })
|
|
// ADDSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDSUBPD performs "Packed Double-FP Add/Subtract".
|
|
//
|
|
// Mnemonic : ADDSUBPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDSUBPD xmm, xmm [SSE3]
|
|
// * ADDSUBPD m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) ADDSUBPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDSUBPD", 2, Operands { v0, v1 })
|
|
// ADDSUBPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDSUBPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDSUBPS performs "Packed Single-FP Add/Subtract".
|
|
//
|
|
// Mnemonic : ADDSUBPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADDSUBPS xmm, xmm [SSE3]
|
|
// * ADDSUBPS m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) ADDSUBPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDSUBPS", 2, Operands { v0, v1 })
|
|
// ADDSUBPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDSUBPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADDW performs "Add".
|
|
//
|
|
// Mnemonic : ADD
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ADDW imm16, ax
|
|
// * ADDW imm8, r16
|
|
// * ADDW imm16, r16
|
|
// * ADDW r16, r16
|
|
// * ADDW m16, r16
|
|
// * ADDW imm8, m16
|
|
// * ADDW imm16, m16
|
|
// * ADDW r16, m16
|
|
//
|
|
func (self *Program) ADDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADDW", 2, Operands { v0, v1 })
|
|
// ADDW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x05)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADDW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ADDW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ADDW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADOXL performs "Unsigned Integer Addition of Two Operands with Overflow Flag".
|
|
//
|
|
// Mnemonic : ADOX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADOXL r32, r32 [ADX]
|
|
// * ADOXL m32, r32 [ADX]
|
|
//
|
|
func (self *Program) ADOXL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADOXL", 2, Operands { v0, v1 })
|
|
// ADOXL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADOXL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADOXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ADOXQ performs "Unsigned Integer Addition of Two Operands with Overflow Flag".
|
|
//
|
|
// Mnemonic : ADOX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ADOXQ r64, r64 [ADX]
|
|
// * ADOXQ m64, r64 [ADX]
|
|
//
|
|
func (self *Program) ADOXQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ADOXQ", 2, Operands { v0, v1 })
|
|
// ADOXQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ADOXQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_ADX)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ADOXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESDEC performs "Perform One Round of an AES Decryption Flow".
|
|
//
|
|
// Mnemonic : AESDEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESDEC xmm, xmm [AES]
|
|
// * AESDEC m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESDEC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("AESDEC", 2, Operands { v0, v1 })
|
|
// AESDEC xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// AESDEC m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESDEC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESDECLAST performs "Perform Last Round of an AES Decryption Flow".
|
|
//
|
|
// Mnemonic : AESDECLAST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESDECLAST xmm, xmm [AES]
|
|
// * AESDECLAST m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESDECLAST(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("AESDECLAST", 2, Operands { v0, v1 })
|
|
// AESDECLAST xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// AESDECLAST m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESDECLAST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESENC performs "Perform One Round of an AES Encryption Flow".
|
|
//
|
|
// Mnemonic : AESENC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESENC xmm, xmm [AES]
|
|
// * AESENC m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESENC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("AESENC", 2, Operands { v0, v1 })
|
|
// AESENC xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// AESENC m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESENC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESENCLAST performs "Perform Last Round of an AES Encryption Flow".
|
|
//
|
|
// Mnemonic : AESENCLAST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESENCLAST xmm, xmm [AES]
|
|
// * AESENCLAST m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESENCLAST(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("AESENCLAST", 2, Operands { v0, v1 })
|
|
// AESENCLAST xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// AESENCLAST m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESENCLAST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESIMC performs "Perform the AES InvMixColumn Transformation".
|
|
//
|
|
// Mnemonic : AESIMC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESIMC xmm, xmm [AES]
|
|
// * AESIMC m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESIMC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("AESIMC", 2, Operands { v0, v1 })
|
|
// AESIMC xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// AESIMC m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESIMC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// AESKEYGENASSIST performs "AES Round Key Generation Assist".
|
|
//
|
|
// Mnemonic : AESKEYGENASSIST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * AESKEYGENASSIST imm8, xmm, xmm [AES]
|
|
// * AESKEYGENASSIST imm8, m128, xmm [AES]
|
|
//
|
|
func (self *Program) AESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("AESKEYGENASSIST", 3, Operands { v0, v1, v2 })
|
|
// AESKEYGENASSIST imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// AESKEYGENASSIST imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for AESKEYGENASSIST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDB performs "Logical AND".
|
|
//
|
|
// Mnemonic : AND
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ANDB imm8, al
|
|
// * ANDB imm8, r8
|
|
// * ANDB r8, r8
|
|
// * ANDB m8, r8
|
|
// * ANDB imm8, m8
|
|
// * ANDB r8, m8
|
|
//
|
|
func (self *Program) ANDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDB", 2, Operands { v0, v1 })
|
|
// ANDB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x24)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ANDB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDL performs "Logical AND".
|
|
//
|
|
// Mnemonic : AND
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ANDL imm32, eax
|
|
// * ANDL imm8, r32
|
|
// * ANDL imm32, r32
|
|
// * ANDL r32, r32
|
|
// * ANDL m32, r32
|
|
// * ANDL imm8, m32
|
|
// * ANDL imm32, m32
|
|
// * ANDL r32, m32
|
|
//
|
|
func (self *Program) ANDL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDL", 2, Operands { v0, v1 })
|
|
// ANDL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x25)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ANDL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDNL performs "Logical AND NOT".
|
|
//
|
|
// Mnemonic : ANDN
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDNL r32, r32, r32 [BMI]
|
|
// * ANDNL m32, r32, r32 [BMI]
|
|
//
|
|
func (self *Program) ANDNL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ANDNL", 3, Operands { v0, v1, v2 })
|
|
// ANDNL r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDNL m32, r32, r32
|
|
if isM32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDNL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ANDNPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDNPD xmm, xmm [SSE2]
|
|
// * ANDNPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) ANDNPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDNPD", 2, Operands { v0, v1 })
|
|
// ANDNPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDNPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDNPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ANDNPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDNPS xmm, xmm [SSE]
|
|
// * ANDNPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) ANDNPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDNPS", 2, Operands { v0, v1 })
|
|
// ANDNPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDNPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDNPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDNQ performs "Logical AND NOT".
|
|
//
|
|
// Mnemonic : ANDN
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDNQ r64, r64, r64 [BMI]
|
|
// * ANDNQ m64, r64, r64 [BMI]
|
|
//
|
|
func (self *Program) ANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ANDNQ", 3, Operands { v0, v1, v2 })
|
|
// ANDNQ r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDNQ m64, r64, r64
|
|
if isM64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDNQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ANDPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDPD xmm, xmm [SSE2]
|
|
// * ANDPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) ANDPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDPD", 2, Operands { v0, v1 })
|
|
// ANDPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ANDPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ANDPS xmm, xmm [SSE]
|
|
// * ANDPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) ANDPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDPS", 2, Operands { v0, v1 })
|
|
// ANDPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDQ performs "Logical AND".
|
|
//
|
|
// Mnemonic : AND
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ANDQ imm32, rax
|
|
// * ANDQ imm8, r64
|
|
// * ANDQ imm32, r64
|
|
// * ANDQ r64, r64
|
|
// * ANDQ m64, r64
|
|
// * ANDQ imm8, m64
|
|
// * ANDQ imm32, m64
|
|
// * ANDQ r64, m64
|
|
//
|
|
func (self *Program) ANDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDQ", 2, Operands { v0, v1 })
|
|
// ANDQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x25)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ANDQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ANDW performs "Logical AND".
|
|
//
|
|
// Mnemonic : AND
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ANDW imm16, ax
|
|
// * ANDW imm8, r16
|
|
// * ANDW imm16, r16
|
|
// * ANDW r16, r16
|
|
// * ANDW m16, r16
|
|
// * ANDW imm8, m16
|
|
// * ANDW imm16, m16
|
|
// * ANDW r16, m16
|
|
//
|
|
func (self *Program) ANDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ANDW", 2, Operands { v0, v1 })
|
|
// ANDW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x25)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ANDW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ANDW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ANDW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ANDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BEXTR performs "Bit Field Extract".
|
|
//
|
|
// Mnemonic : BEXTR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * BEXTR imm32, r32, r32 [TBM]
|
|
// * BEXTR imm32, m32, r32 [TBM]
|
|
// * BEXTR imm32, r64, r64 [TBM]
|
|
// * BEXTR imm32, m64, r64 [TBM]
|
|
// * BEXTR r32, r32, r32 [BMI]
|
|
// * BEXTR r32, m32, r32 [BMI]
|
|
// * BEXTR r64, r64, r64 [BMI]
|
|
// * BEXTR r64, m64, r64 [BMI]
|
|
//
|
|
func (self *Program) BEXTR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BEXTR", 3, Operands { v0, v1, v2 })
|
|
// BEXTR imm32, r32, r32
|
|
if isImm32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BEXTR imm32, m32, r32
|
|
if isImm32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1010, 0x00, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BEXTR imm32, r64, r64
|
|
if isImm32(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf8)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BEXTR imm32, m64, r64
|
|
if isImm32(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1010, 0x80, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BEXTR r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BEXTR r32, m32, r32
|
|
if isReg32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// BEXTR r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BEXTR r64, m64, r64
|
|
if isReg64(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BEXTR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLCFILL performs "Fill From Lowest Clear Bit".
|
|
//
|
|
// Mnemonic : BLCFILL
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLCFILL r32, r32 [TBM]
|
|
// * BLCFILL m32, r32 [TBM]
|
|
// * BLCFILL r64, r64 [TBM]
|
|
// * BLCFILL m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLCFILL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLCFILL", 2, Operands { v0, v1 })
|
|
// BLCFILL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCFILL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLCFILL r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCFILL m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLCFILL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLCI performs "Isolate Lowest Clear Bit".
|
|
//
|
|
// Mnemonic : BLCI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLCI r32, r32 [TBM]
|
|
// * BLCI m32, r32 [TBM]
|
|
// * BLCI r64, r64 [TBM]
|
|
// * BLCI m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLCI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLCI", 2, Operands { v0, v1 })
|
|
// BLCI r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCI m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLCI r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCI m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLCI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLCIC performs "Isolate Lowest Set Bit and Complement".
|
|
//
|
|
// Mnemonic : BLCIC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLCIC r32, r32 [TBM]
|
|
// * BLCIC m32, r32 [TBM]
|
|
// * BLCIC r64, r64 [TBM]
|
|
// * BLCIC m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLCIC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLCIC", 2, Operands { v0, v1 })
|
|
// BLCIC r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCIC m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLCIC r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCIC m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLCIC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLCMSK performs "Mask From Lowest Clear Bit".
|
|
//
|
|
// Mnemonic : BLCMSK
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLCMSK r32, r32 [TBM]
|
|
// * BLCMSK m32, r32 [TBM]
|
|
// * BLCMSK r64, r64 [TBM]
|
|
// * BLCMSK m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLCMSK(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLCMSK", 2, Operands { v0, v1 })
|
|
// BLCMSK r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCMSK m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLCMSK r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCMSK m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLCMSK")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLCS performs "Set Lowest Clear Bit".
|
|
//
|
|
// Mnemonic : BLCS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLCS r32, r32 [TBM]
|
|
// * BLCS m32, r32 [TBM]
|
|
// * BLCS r64, r64 [TBM]
|
|
// * BLCS m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLCS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLCS", 2, Operands { v0, v1 })
|
|
// BLCS r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCS m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLCS r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLCS m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLCS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLENDPD performs "Blend Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : BLENDPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BLENDPD imm8, xmm, xmm [SSE4.1]
|
|
// * BLENDPD imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) BLENDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BLENDPD", 3, Operands { v0, v1, v2 })
|
|
// BLENDPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BLENDPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLENDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLENDPS performs " Blend Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : BLENDPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BLENDPS imm8, xmm, xmm [SSE4.1]
|
|
// * BLENDPS imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) BLENDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BLENDPS", 3, Operands { v0, v1, v2 })
|
|
// BLENDPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BLENDPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLENDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : BLENDVPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BLENDVPD xmm0, xmm, xmm [SSE4.1]
|
|
// * BLENDVPD xmm0, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) BLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BLENDVPD", 3, Operands { v0, v1, v2 })
|
|
// BLENDVPD xmm0, xmm, xmm
|
|
if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BLENDVPD xmm0, m128, xmm
|
|
if v0 == XMM0 && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLENDVPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : BLENDVPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BLENDVPS xmm0, xmm, xmm [SSE4.1]
|
|
// * BLENDVPS xmm0, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) BLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BLENDVPS", 3, Operands { v0, v1, v2 })
|
|
// BLENDVPS xmm0, xmm, xmm
|
|
if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BLENDVPS xmm0, m128, xmm
|
|
if v0 == XMM0 && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLENDVPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLSFILL performs "Fill From Lowest Set Bit".
|
|
//
|
|
// Mnemonic : BLSFILL
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLSFILL r32, r32 [TBM]
|
|
// * BLSFILL m32, r32 [TBM]
|
|
// * BLSFILL r64, r64 [TBM]
|
|
// * BLSFILL m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLSFILL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLSFILL", 2, Operands { v0, v1 })
|
|
// BLSFILL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSFILL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLSFILL r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSFILL m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLSFILL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLSI performs "Isolate Lowest Set Bit".
|
|
//
|
|
// Mnemonic : BLSI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLSI r32, r32 [BMI]
|
|
// * BLSI m32, r32 [BMI]
|
|
// * BLSI r64, r64 [BMI]
|
|
// * BLSI m64, r64 [BMI]
|
|
//
|
|
func (self *Program) BLSI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLSI", 2, Operands { v0, v1 })
|
|
// BLSI r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSI m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLSI r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSI m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLSI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLSIC performs "Isolate Lowest Set Bit and Complement".
|
|
//
|
|
// Mnemonic : BLSIC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLSIC r32, r32 [TBM]
|
|
// * BLSIC m32, r32 [TBM]
|
|
// * BLSIC r64, r64 [TBM]
|
|
// * BLSIC m64, r64 [TBM]
|
|
//
|
|
func (self *Program) BLSIC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLSIC", 2, Operands { v0, v1 })
|
|
// BLSIC r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSIC m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLSIC r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSIC m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLSIC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLSMSK performs "Mask From Lowest Set Bit".
|
|
//
|
|
// Mnemonic : BLSMSK
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLSMSK r32, r32 [BMI]
|
|
// * BLSMSK m32, r32 [BMI]
|
|
// * BLSMSK r64, r64 [BMI]
|
|
// * BLSMSK m64, r64 [BMI]
|
|
//
|
|
func (self *Program) BLSMSK(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLSMSK", 2, Operands { v0, v1 })
|
|
// BLSMSK r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSMSK m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLSMSK r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSMSK m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLSMSK")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BLSR performs "Reset Lowest Set Bit".
|
|
//
|
|
// Mnemonic : BLSR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BLSR r32, r32 [BMI]
|
|
// * BLSR m32, r32 [BMI]
|
|
// * BLSR r64, r64 [BMI]
|
|
// * BLSR m64, r64 [BMI]
|
|
//
|
|
func (self *Program) BLSR(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BLSR", 2, Operands { v0, v1 })
|
|
// BLSR r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSR m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// BLSR r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf3)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BLSR m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BLSR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSFL performs "Bit Scan Forward".
|
|
//
|
|
// Mnemonic : BSF
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSFL r32, r32
|
|
// * BSFL m32, r32
|
|
//
|
|
func (self *Program) BSFL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSFL", 2, Operands { v0, v1 })
|
|
// BSFL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSFL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSFL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSFQ performs "Bit Scan Forward".
|
|
//
|
|
// Mnemonic : BSF
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSFQ r64, r64
|
|
// * BSFQ m64, r64
|
|
//
|
|
func (self *Program) BSFQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSFQ", 2, Operands { v0, v1 })
|
|
// BSFQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSFQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSFQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSFW performs "Bit Scan Forward".
|
|
//
|
|
// Mnemonic : BSF
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSFW r16, r16
|
|
// * BSFW m16, r16
|
|
//
|
|
func (self *Program) BSFW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSFW", 2, Operands { v0, v1 })
|
|
// BSFW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSFW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSFW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSRL performs "Bit Scan Reverse".
|
|
//
|
|
// Mnemonic : BSR
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSRL r32, r32
|
|
// * BSRL m32, r32
|
|
//
|
|
func (self *Program) BSRL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSRL", 2, Operands { v0, v1 })
|
|
// BSRL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSRL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSRL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSRQ performs "Bit Scan Reverse".
|
|
//
|
|
// Mnemonic : BSR
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSRQ r64, r64
|
|
// * BSRQ m64, r64
|
|
//
|
|
func (self *Program) BSRQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSRQ", 2, Operands { v0, v1 })
|
|
// BSRQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSRQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSRW performs "Bit Scan Reverse".
|
|
//
|
|
// Mnemonic : BSR
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * BSRW r16, r16
|
|
// * BSRW m16, r16
|
|
//
|
|
func (self *Program) BSRW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BSRW", 2, Operands { v0, v1 })
|
|
// BSRW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// BSRW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSWAPL performs "Byte Swap".
|
|
//
|
|
// Mnemonic : BSWAP
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * BSWAPL r32
|
|
//
|
|
func (self *Program) BSWAPL(v0 interface{}) *Instruction {
|
|
p := self.alloc("BSWAPL", 1, Operands { v0 })
|
|
// BSWAPL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSWAPL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BSWAPQ performs "Byte Swap".
|
|
//
|
|
// Mnemonic : BSWAP
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * BSWAPQ r64
|
|
//
|
|
func (self *Program) BSWAPQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("BSWAPQ", 1, Operands { v0 })
|
|
// BSWAPQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BSWAPQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTCL performs "Bit Test and Complement".
|
|
//
|
|
// Mnemonic : BTC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTCL imm8, r32
|
|
// * BTCL r32, r32
|
|
// * BTCL imm8, m32
|
|
// * BTCL r32, m32
|
|
//
|
|
func (self *Program) BTCL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTCL", 2, Operands { v0, v1 })
|
|
// BTCL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTCL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTCL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTCQ performs "Bit Test and Complement".
|
|
//
|
|
// Mnemonic : BTC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTCQ imm8, r64
|
|
// * BTCQ r64, r64
|
|
// * BTCQ imm8, m64
|
|
// * BTCQ r64, m64
|
|
//
|
|
func (self *Program) BTCQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTCQ", 2, Operands { v0, v1 })
|
|
// BTCQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTCQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTCQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTCW performs "Bit Test and Complement".
|
|
//
|
|
// Mnemonic : BTC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTCW imm8, r16
|
|
// * BTCW r16, r16
|
|
// * BTCW imm8, m16
|
|
// * BTCW r16, m16
|
|
//
|
|
func (self *Program) BTCW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTCW", 2, Operands { v0, v1 })
|
|
// BTCW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTCW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTCW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTCW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTL performs "Bit Test".
|
|
//
|
|
// Mnemonic : BT
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTL imm8, r32
|
|
// * BTL r32, r32
|
|
// * BTL imm8, m32
|
|
// * BTL r32, m32
|
|
//
|
|
func (self *Program) BTL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTL", 2, Operands { v0, v1 })
|
|
// BTL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTQ performs "Bit Test".
|
|
//
|
|
// Mnemonic : BT
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTQ imm8, r64
|
|
// * BTQ r64, r64
|
|
// * BTQ imm8, m64
|
|
// * BTQ r64, m64
|
|
//
|
|
func (self *Program) BTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTQ", 2, Operands { v0, v1 })
|
|
// BTQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTRL performs "Bit Test and Reset".
|
|
//
|
|
// Mnemonic : BTR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTRL imm8, r32
|
|
// * BTRL r32, r32
|
|
// * BTRL imm8, m32
|
|
// * BTRL r32, m32
|
|
//
|
|
func (self *Program) BTRL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTRL", 2, Operands { v0, v1 })
|
|
// BTRL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTRL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTRL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTRQ performs "Bit Test and Reset".
|
|
//
|
|
// Mnemonic : BTR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTRQ imm8, r64
|
|
// * BTRQ r64, r64
|
|
// * BTRQ imm8, m64
|
|
// * BTRQ r64, m64
|
|
//
|
|
func (self *Program) BTRQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTRQ", 2, Operands { v0, v1 })
|
|
// BTRQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTRQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTRW performs "Bit Test and Reset".
|
|
//
|
|
// Mnemonic : BTR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTRW imm8, r16
|
|
// * BTRW r16, r16
|
|
// * BTRW imm8, m16
|
|
// * BTRW r16, m16
|
|
//
|
|
func (self *Program) BTRW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTRW", 2, Operands { v0, v1 })
|
|
// BTRW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTRW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTRW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTSL performs "Bit Test and Set".
|
|
//
|
|
// Mnemonic : BTS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTSL imm8, r32
|
|
// * BTSL r32, r32
|
|
// * BTSL imm8, m32
|
|
// * BTSL r32, m32
|
|
//
|
|
func (self *Program) BTSL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTSL", 2, Operands { v0, v1 })
|
|
// BTSL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTSL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTSL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTSQ performs "Bit Test and Set".
|
|
//
|
|
// Mnemonic : BTS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTSQ imm8, r64
|
|
// * BTSQ r64, r64
|
|
// * BTSQ imm8, m64
|
|
// * BTSQ r64, m64
|
|
//
|
|
func (self *Program) BTSQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTSQ", 2, Operands { v0, v1 })
|
|
// BTSQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTSQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTSQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTSW performs "Bit Test and Set".
|
|
//
|
|
// Mnemonic : BTS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTSW imm8, r16
|
|
// * BTSW r16, r16
|
|
// * BTSW imm8, m16
|
|
// * BTSW r16, m16
|
|
//
|
|
func (self *Program) BTSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTSW", 2, Operands { v0, v1 })
|
|
// BTSW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTSW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTSW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BTW performs "Bit Test".
|
|
//
|
|
// Mnemonic : BT
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BTW imm8, r16
|
|
// * BTW r16, r16
|
|
// * BTW imm8, m16
|
|
// * BTW r16, m16
|
|
//
|
|
func (self *Program) BTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("BTW", 2, Operands { v0, v1 })
|
|
// BTW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BTW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xba)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// BTW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// BZHI performs "Zero High Bits Starting with Specified Bit Position".
|
|
//
|
|
// Mnemonic : BZHI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * BZHI r32, r32, r32 [BMI2]
|
|
// * BZHI r32, m32, r32 [BMI2]
|
|
// * BZHI r64, r64, r64 [BMI2]
|
|
// * BZHI r64, m64, r64 [BMI2]
|
|
//
|
|
func (self *Program) BZHI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("BZHI", 3, Operands { v0, v1, v2 })
|
|
// BZHI r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BZHI r32, m32, r32
|
|
if isReg32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// BZHI r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// BZHI r64, m64, r64
|
|
if isReg64(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for BZHI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CALL performs "Call Procedure".
|
|
//
|
|
// Mnemonic : CALL
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CALL rel32
|
|
//
|
|
func (self *Program) CALL(v0 interface{}) *Instruction {
|
|
p := self.alloc("CALL", 1, Operands { v0 })
|
|
// CALL rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe8)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// CALL label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe8)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CALL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CALLQ performs "Call Procedure".
|
|
//
|
|
// Mnemonic : CALL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CALLQ r64
|
|
// * CALLQ m64
|
|
//
|
|
func (self *Program) CALLQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("CALLQ", 1, Operands { v0 })
|
|
// CALLQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CALLQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CALLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CBTW performs "Convert Byte to Word".
|
|
//
|
|
// Mnemonic : CBW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CBTW
|
|
//
|
|
func (self *Program) CBTW() *Instruction {
|
|
p := self.alloc("CBTW", 0, Operands { })
|
|
// CBTW
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x98)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CLC performs "Clear Carry Flag".
|
|
//
|
|
// Mnemonic : CLC
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLC
|
|
//
|
|
func (self *Program) CLC() *Instruction {
|
|
p := self.alloc("CLC", 0, Operands { })
|
|
// CLC
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf8)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CLD performs "Clear Direction Flag".
|
|
//
|
|
// Mnemonic : CLD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLD
|
|
//
|
|
func (self *Program) CLD() *Instruction {
|
|
p := self.alloc("CLD", 0, Operands { })
|
|
// CLD
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xfc)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CLFLUSH performs "Flush Cache Line".
|
|
//
|
|
// Mnemonic : CLFLUSH
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLFLUSH m8 [CLFLUSH]
|
|
//
|
|
func (self *Program) CLFLUSH(v0 interface{}) *Instruction {
|
|
p := self.alloc("CLFLUSH", 1, Operands { v0 })
|
|
// CLFLUSH m8
|
|
if isM8(v0) {
|
|
self.require(ISA_CLFLUSH)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CLFLUSH")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CLFLUSHOPT performs "Flush Cache Line Optimized".
|
|
//
|
|
// Mnemonic : CLFLUSHOPT
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLFLUSHOPT m8 [CLFLUSHOPT]
|
|
//
|
|
func (self *Program) CLFLUSHOPT(v0 interface{}) *Instruction {
|
|
p := self.alloc("CLFLUSHOPT", 1, Operands { v0 })
|
|
// CLFLUSHOPT m8
|
|
if isM8(v0) {
|
|
self.require(ISA_CLFLUSHOPT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CLFLUSHOPT")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CLTD performs "Convert Doubleword to Quadword".
|
|
//
|
|
// Mnemonic : CDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLTD
|
|
//
|
|
func (self *Program) CLTD() *Instruction {
|
|
p := self.alloc("CLTD", 0, Operands { })
|
|
// CLTD
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x99)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CLTQ performs "Convert Doubleword to Quadword".
|
|
//
|
|
// Mnemonic : CDQE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLTQ
|
|
//
|
|
func (self *Program) CLTQ() *Instruction {
|
|
p := self.alloc("CLTQ", 0, Operands { })
|
|
// CLTQ
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x98)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CLWB performs "Cache Line Write Back".
|
|
//
|
|
// Mnemonic : CLWB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLWB m8 [CLWB]
|
|
//
|
|
func (self *Program) CLWB(v0 interface{}) *Instruction {
|
|
p := self.alloc("CLWB", 1, Operands { v0 })
|
|
// CLWB m8
|
|
if isM8(v0) {
|
|
self.require(ISA_CLWB)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CLWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CLZERO performs "Zero-out 64-bit Cache Line".
|
|
//
|
|
// Mnemonic : CLZERO
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CLZERO [CLZERO]
|
|
//
|
|
func (self *Program) CLZERO() *Instruction {
|
|
p := self.alloc("CLZERO", 0, Operands { })
|
|
// CLZERO
|
|
self.require(ISA_CLZERO)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xfc)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CMC performs "Complement Carry Flag".
|
|
//
|
|
// Mnemonic : CMC
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CMC
|
|
//
|
|
func (self *Program) CMC() *Instruction {
|
|
p := self.alloc("CMC", 0, Operands { })
|
|
// CMC
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf5)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CMOVA performs "Move if above (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : CMOVA
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVA r16, r16 [CMOV]
|
|
// * CMOVA m16, r16 [CMOV]
|
|
// * CMOVA r32, r32 [CMOV]
|
|
// * CMOVA m32, r32 [CMOV]
|
|
// * CMOVA r64, r64 [CMOV]
|
|
// * CMOVA m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVA", 2, Operands { v0, v1 })
|
|
// CMOVA r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVA m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVA r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVA m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVA r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVA m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVAE performs "Move if above or equal (CF == 0)".
|
|
//
|
|
// Mnemonic : CMOVAE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVAE r16, r16 [CMOV]
|
|
// * CMOVAE m16, r16 [CMOV]
|
|
// * CMOVAE r32, r32 [CMOV]
|
|
// * CMOVAE m32, r32 [CMOV]
|
|
// * CMOVAE r64, r64 [CMOV]
|
|
// * CMOVAE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVAE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVAE", 2, Operands { v0, v1 })
|
|
// CMOVAE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVAE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVAE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVAE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVAE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVAE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVB performs "Move if below (CF == 1)".
|
|
//
|
|
// Mnemonic : CMOVB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVB r16, r16 [CMOV]
|
|
// * CMOVB m16, r16 [CMOV]
|
|
// * CMOVB r32, r32 [CMOV]
|
|
// * CMOVB m32, r32 [CMOV]
|
|
// * CMOVB r64, r64 [CMOV]
|
|
// * CMOVB m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVB", 2, Operands { v0, v1 })
|
|
// CMOVB r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVB m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVB r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVB m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVB r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVB m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVBE performs "Move if below or equal (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : CMOVBE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVBE r16, r16 [CMOV]
|
|
// * CMOVBE m16, r16 [CMOV]
|
|
// * CMOVBE r32, r32 [CMOV]
|
|
// * CMOVBE m32, r32 [CMOV]
|
|
// * CMOVBE r64, r64 [CMOV]
|
|
// * CMOVBE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVBE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVBE", 2, Operands { v0, v1 })
|
|
// CMOVBE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVBE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVBE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVBE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVBE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVBE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVC performs "Move if carry (CF == 1)".
|
|
//
|
|
// Mnemonic : CMOVC
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVC r16, r16 [CMOV]
|
|
// * CMOVC m16, r16 [CMOV]
|
|
// * CMOVC r32, r32 [CMOV]
|
|
// * CMOVC m32, r32 [CMOV]
|
|
// * CMOVC r64, r64 [CMOV]
|
|
// * CMOVC m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVC", 2, Operands { v0, v1 })
|
|
// CMOVC r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVC m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVC r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVC m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVC r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVC m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVE performs "Move if equal (ZF == 1)".
|
|
//
|
|
// Mnemonic : CMOVE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVE r16, r16 [CMOV]
|
|
// * CMOVE m16, r16 [CMOV]
|
|
// * CMOVE r32, r32 [CMOV]
|
|
// * CMOVE m32, r32 [CMOV]
|
|
// * CMOVE r64, r64 [CMOV]
|
|
// * CMOVE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVE", 2, Operands { v0, v1 })
|
|
// CMOVE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVG performs "Move if greater (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : CMOVG
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVG r16, r16 [CMOV]
|
|
// * CMOVG m16, r16 [CMOV]
|
|
// * CMOVG r32, r32 [CMOV]
|
|
// * CMOVG m32, r32 [CMOV]
|
|
// * CMOVG r64, r64 [CMOV]
|
|
// * CMOVG m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVG(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVG", 2, Operands { v0, v1 })
|
|
// CMOVG r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVG m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVG r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVG m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVG r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVG m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVGE performs "Move if greater or equal (SF == OF)".
|
|
//
|
|
// Mnemonic : CMOVGE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVGE r16, r16 [CMOV]
|
|
// * CMOVGE m16, r16 [CMOV]
|
|
// * CMOVGE r32, r32 [CMOV]
|
|
// * CMOVGE m32, r32 [CMOV]
|
|
// * CMOVGE r64, r64 [CMOV]
|
|
// * CMOVGE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVGE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVGE", 2, Operands { v0, v1 })
|
|
// CMOVGE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVGE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVGE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVGE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVGE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVGE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVL performs "Move if less (SF != OF)".
|
|
//
|
|
// Mnemonic : CMOVL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVL r16, r16 [CMOV]
|
|
// * CMOVL m16, r16 [CMOV]
|
|
// * CMOVL r32, r32 [CMOV]
|
|
// * CMOVL m32, r32 [CMOV]
|
|
// * CMOVL r64, r64 [CMOV]
|
|
// * CMOVL m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVL", 2, Operands { v0, v1 })
|
|
// CMOVL r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVL m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVL r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVL m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVLE performs "Move if less or equal (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : CMOVLE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVLE r16, r16 [CMOV]
|
|
// * CMOVLE m16, r16 [CMOV]
|
|
// * CMOVLE r32, r32 [CMOV]
|
|
// * CMOVLE m32, r32 [CMOV]
|
|
// * CMOVLE r64, r64 [CMOV]
|
|
// * CMOVLE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVLE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVLE", 2, Operands { v0, v1 })
|
|
// CMOVLE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVLE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVLE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVLE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVLE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVLE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNA performs "Move if not above (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : CMOVNA
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNA r16, r16 [CMOV]
|
|
// * CMOVNA m16, r16 [CMOV]
|
|
// * CMOVNA r32, r32 [CMOV]
|
|
// * CMOVNA m32, r32 [CMOV]
|
|
// * CMOVNA r64, r64 [CMOV]
|
|
// * CMOVNA m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNA", 2, Operands { v0, v1 })
|
|
// CMOVNA r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNA m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNA r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNA m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNA r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNA m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNAE performs "Move if not above or equal (CF == 1)".
|
|
//
|
|
// Mnemonic : CMOVNAE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNAE r16, r16 [CMOV]
|
|
// * CMOVNAE m16, r16 [CMOV]
|
|
// * CMOVNAE r32, r32 [CMOV]
|
|
// * CMOVNAE m32, r32 [CMOV]
|
|
// * CMOVNAE r64, r64 [CMOV]
|
|
// * CMOVNAE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNAE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNAE", 2, Operands { v0, v1 })
|
|
// CMOVNAE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNAE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNAE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNAE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNAE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNAE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNB performs "Move if not below (CF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNB r16, r16 [CMOV]
|
|
// * CMOVNB m16, r16 [CMOV]
|
|
// * CMOVNB r32, r32 [CMOV]
|
|
// * CMOVNB m32, r32 [CMOV]
|
|
// * CMOVNB r64, r64 [CMOV]
|
|
// * CMOVNB m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNB", 2, Operands { v0, v1 })
|
|
// CMOVNB r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNB m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNB r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNB m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNB r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNB m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNBE performs "Move if not below or equal (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNBE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNBE r16, r16 [CMOV]
|
|
// * CMOVNBE m16, r16 [CMOV]
|
|
// * CMOVNBE r32, r32 [CMOV]
|
|
// * CMOVNBE m32, r32 [CMOV]
|
|
// * CMOVNBE r64, r64 [CMOV]
|
|
// * CMOVNBE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNBE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNBE", 2, Operands { v0, v1 })
|
|
// CMOVNBE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNBE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNBE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNBE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNBE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNBE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNC performs "Move if not carry (CF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNC
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNC r16, r16 [CMOV]
|
|
// * CMOVNC m16, r16 [CMOV]
|
|
// * CMOVNC r32, r32 [CMOV]
|
|
// * CMOVNC m32, r32 [CMOV]
|
|
// * CMOVNC r64, r64 [CMOV]
|
|
// * CMOVNC m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNC", 2, Operands { v0, v1 })
|
|
// CMOVNC r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNC m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNC r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNC m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNC r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNC m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNE performs "Move if not equal (ZF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNE r16, r16 [CMOV]
|
|
// * CMOVNE m16, r16 [CMOV]
|
|
// * CMOVNE r32, r32 [CMOV]
|
|
// * CMOVNE m32, r32 [CMOV]
|
|
// * CMOVNE r64, r64 [CMOV]
|
|
// * CMOVNE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNE", 2, Operands { v0, v1 })
|
|
// CMOVNE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNG performs "Move if not greater (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : CMOVNG
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNG r16, r16 [CMOV]
|
|
// * CMOVNG m16, r16 [CMOV]
|
|
// * CMOVNG r32, r32 [CMOV]
|
|
// * CMOVNG m32, r32 [CMOV]
|
|
// * CMOVNG r64, r64 [CMOV]
|
|
// * CMOVNG m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNG(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNG", 2, Operands { v0, v1 })
|
|
// CMOVNG r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNG m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNG r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNG m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNG r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNG m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNGE performs "Move if not greater or equal (SF != OF)".
|
|
//
|
|
// Mnemonic : CMOVNGE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNGE r16, r16 [CMOV]
|
|
// * CMOVNGE m16, r16 [CMOV]
|
|
// * CMOVNGE r32, r32 [CMOV]
|
|
// * CMOVNGE m32, r32 [CMOV]
|
|
// * CMOVNGE r64, r64 [CMOV]
|
|
// * CMOVNGE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNGE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNGE", 2, Operands { v0, v1 })
|
|
// CMOVNGE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNGE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNGE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNGE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNGE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNGE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNL performs "Move if not less (SF == OF)".
|
|
//
|
|
// Mnemonic : CMOVNL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNL r16, r16 [CMOV]
|
|
// * CMOVNL m16, r16 [CMOV]
|
|
// * CMOVNL r32, r32 [CMOV]
|
|
// * CMOVNL m32, r32 [CMOV]
|
|
// * CMOVNL r64, r64 [CMOV]
|
|
// * CMOVNL m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNL", 2, Operands { v0, v1 })
|
|
// CMOVNL r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNL m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNL r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNL m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNLE performs "Move if not less or equal (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : CMOVNLE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNLE r16, r16 [CMOV]
|
|
// * CMOVNLE m16, r16 [CMOV]
|
|
// * CMOVNLE r32, r32 [CMOV]
|
|
// * CMOVNLE m32, r32 [CMOV]
|
|
// * CMOVNLE r64, r64 [CMOV]
|
|
// * CMOVNLE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNLE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNLE", 2, Operands { v0, v1 })
|
|
// CMOVNLE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNLE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNLE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNLE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNLE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNLE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNO performs "Move if not overflow (OF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNO
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNO r16, r16 [CMOV]
|
|
// * CMOVNO m16, r16 [CMOV]
|
|
// * CMOVNO r32, r32 [CMOV]
|
|
// * CMOVNO m32, r32 [CMOV]
|
|
// * CMOVNO r64, r64 [CMOV]
|
|
// * CMOVNO m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNO(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNO", 2, Operands { v0, v1 })
|
|
// CMOVNO r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNO m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNO r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNO m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNO r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNO m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNP performs "Move if not parity (PF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNP
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNP r16, r16 [CMOV]
|
|
// * CMOVNP m16, r16 [CMOV]
|
|
// * CMOVNP r32, r32 [CMOV]
|
|
// * CMOVNP m32, r32 [CMOV]
|
|
// * CMOVNP r64, r64 [CMOV]
|
|
// * CMOVNP m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNP", 2, Operands { v0, v1 })
|
|
// CMOVNP r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNP m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNP r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNP m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNP r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNP m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNS performs "Move if not sign (SF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNS r16, r16 [CMOV]
|
|
// * CMOVNS m16, r16 [CMOV]
|
|
// * CMOVNS r32, r32 [CMOV]
|
|
// * CMOVNS m32, r32 [CMOV]
|
|
// * CMOVNS r64, r64 [CMOV]
|
|
// * CMOVNS m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNS", 2, Operands { v0, v1 })
|
|
// CMOVNS r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNS m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNS r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNS m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNS r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNS m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVNZ performs "Move if not zero (ZF == 0)".
|
|
//
|
|
// Mnemonic : CMOVNZ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVNZ r16, r16 [CMOV]
|
|
// * CMOVNZ m16, r16 [CMOV]
|
|
// * CMOVNZ r32, r32 [CMOV]
|
|
// * CMOVNZ m32, r32 [CMOV]
|
|
// * CMOVNZ r64, r64 [CMOV]
|
|
// * CMOVNZ m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVNZ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVNZ", 2, Operands { v0, v1 })
|
|
// CMOVNZ r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNZ m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNZ r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNZ m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVNZ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVNZ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVNZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVO performs "Move if overflow (OF == 1)".
|
|
//
|
|
// Mnemonic : CMOVO
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVO r16, r16 [CMOV]
|
|
// * CMOVO m16, r16 [CMOV]
|
|
// * CMOVO r32, r32 [CMOV]
|
|
// * CMOVO m32, r32 [CMOV]
|
|
// * CMOVO r64, r64 [CMOV]
|
|
// * CMOVO m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVO(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVO", 2, Operands { v0, v1 })
|
|
// CMOVO r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVO m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVO r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVO m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVO r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVO m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVP performs "Move if parity (PF == 1)".
|
|
//
|
|
// Mnemonic : CMOVP
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVP r16, r16 [CMOV]
|
|
// * CMOVP m16, r16 [CMOV]
|
|
// * CMOVP r32, r32 [CMOV]
|
|
// * CMOVP m32, r32 [CMOV]
|
|
// * CMOVP r64, r64 [CMOV]
|
|
// * CMOVP m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVP", 2, Operands { v0, v1 })
|
|
// CMOVP r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVP m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVP r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVP m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVP r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVP m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVPE performs "Move if parity even (PF == 1)".
|
|
//
|
|
// Mnemonic : CMOVPE
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVPE r16, r16 [CMOV]
|
|
// * CMOVPE m16, r16 [CMOV]
|
|
// * CMOVPE r32, r32 [CMOV]
|
|
// * CMOVPE m32, r32 [CMOV]
|
|
// * CMOVPE r64, r64 [CMOV]
|
|
// * CMOVPE m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVPE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVPE", 2, Operands { v0, v1 })
|
|
// CMOVPE r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPE m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVPE r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPE m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVPE r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPE m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVPE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVPO performs "Move if parity odd (PF == 0)".
|
|
//
|
|
// Mnemonic : CMOVPO
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVPO r16, r16 [CMOV]
|
|
// * CMOVPO m16, r16 [CMOV]
|
|
// * CMOVPO r32, r32 [CMOV]
|
|
// * CMOVPO m32, r32 [CMOV]
|
|
// * CMOVPO r64, r64 [CMOV]
|
|
// * CMOVPO m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVPO(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVPO", 2, Operands { v0, v1 })
|
|
// CMOVPO r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPO m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVPO r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPO m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVPO r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVPO m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVPO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVS performs "Move if sign (SF == 1)".
|
|
//
|
|
// Mnemonic : CMOVS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVS r16, r16 [CMOV]
|
|
// * CMOVS m16, r16 [CMOV]
|
|
// * CMOVS r32, r32 [CMOV]
|
|
// * CMOVS m32, r32 [CMOV]
|
|
// * CMOVS r64, r64 [CMOV]
|
|
// * CMOVS m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVS", 2, Operands { v0, v1 })
|
|
// CMOVS r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVS m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVS r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVS m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVS r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVS m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMOVZ performs "Move if zero (ZF == 1)".
|
|
//
|
|
// Mnemonic : CMOVZ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMOVZ r16, r16 [CMOV]
|
|
// * CMOVZ m16, r16 [CMOV]
|
|
// * CMOVZ r32, r32 [CMOV]
|
|
// * CMOVZ m32, r32 [CMOV]
|
|
// * CMOVZ r64, r64 [CMOV]
|
|
// * CMOVZ m64, r64 [CMOV]
|
|
//
|
|
func (self *Program) CMOVZ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMOVZ", 2, Operands { v0, v1 })
|
|
// CMOVZ r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVZ m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVZ r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVZ m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMOVZ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMOVZ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_CMOV)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMOVZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPB performs "Compare Two Operands".
|
|
//
|
|
// Mnemonic : CMP
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * CMPB imm8, al
|
|
// * CMPB imm8, r8
|
|
// * CMPB r8, r8
|
|
// * CMPB m8, r8
|
|
// * CMPB imm8, m8
|
|
// * CMPB r8, m8
|
|
//
|
|
func (self *Program) CMPB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPB", 2, Operands { v0, v1 })
|
|
// CMPB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x3c)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMPB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMPB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPL performs "Compare Two Operands".
|
|
//
|
|
// Mnemonic : CMP
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * CMPL imm32, eax
|
|
// * CMPL imm8, r32
|
|
// * CMPL imm32, r32
|
|
// * CMPL r32, r32
|
|
// * CMPL m32, r32
|
|
// * CMPL imm8, m32
|
|
// * CMPL imm32, m32
|
|
// * CMPL r32, m32
|
|
//
|
|
func (self *Program) CMPL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPL", 2, Operands { v0, v1 })
|
|
// CMPL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x3d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMPL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMPL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPPD performs "Compare Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : CMPPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPPD imm8, xmm, xmm [SSE2]
|
|
// * CMPPD imm8, m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CMPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("CMPPD", 3, Operands { v0, v1, v2 })
|
|
// CMPPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPPS performs "Compare Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : CMPPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPPS imm8, xmm, xmm [SSE]
|
|
// * CMPPS imm8, m128, xmm [SSE]
|
|
//
|
|
func (self *Program) CMPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("CMPPS", 3, Operands { v0, v1, v2 })
|
|
// CMPPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPQ performs "Compare Two Operands".
|
|
//
|
|
// Mnemonic : CMP
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * CMPQ imm32, rax
|
|
// * CMPQ imm8, r64
|
|
// * CMPQ imm32, r64
|
|
// * CMPQ r64, r64
|
|
// * CMPQ m64, r64
|
|
// * CMPQ imm8, m64
|
|
// * CMPQ imm32, m64
|
|
// * CMPQ r64, m64
|
|
//
|
|
func (self *Program) CMPQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPQ", 2, Operands { v0, v1 })
|
|
// CMPQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x3d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMPQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMPQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPSD performs "Compare Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : CMPSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPSD imm8, xmm, xmm [SSE2]
|
|
// * CMPSD imm8, m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CMPSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("CMPSD", 3, Operands { v0, v1, v2 })
|
|
// CMPSD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPSD imm8, m64, xmm
|
|
if isImm8(v0) && isM64(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPSS performs "Compare Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : CMPSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPSS imm8, xmm, xmm [SSE]
|
|
// * CMPSS imm8, m32, xmm [SSE]
|
|
//
|
|
func (self *Program) CMPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("CMPSS", 3, Operands { v0, v1, v2 })
|
|
// CMPSS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPSS imm8, m32, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPW performs "Compare Two Operands".
|
|
//
|
|
// Mnemonic : CMP
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * CMPW imm16, ax
|
|
// * CMPW imm8, r16
|
|
// * CMPW imm16, r16
|
|
// * CMPW r16, r16
|
|
// * CMPW m16, r16
|
|
// * CMPW imm8, m16
|
|
// * CMPW imm16, m16
|
|
// * CMPW r16, m16
|
|
//
|
|
func (self *Program) CMPW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPW", 2, Operands { v0, v1 })
|
|
// CMPW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x3d)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CMPW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CMPW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// CMPW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHG16B performs "Compare and Exchange 16 Bytes".
|
|
//
|
|
// Mnemonic : CMPXCHG16B
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CMPXCHG16B m128
|
|
//
|
|
func (self *Program) CMPXCHG16B(v0 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHG16B", 1, Operands { v0 })
|
|
// CMPXCHG16B m128
|
|
if isM128(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHG16B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHG8B performs "Compare and Exchange 8 Bytes".
|
|
//
|
|
// Mnemonic : CMPXCHG8B
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CMPXCHG8B m64
|
|
//
|
|
func (self *Program) CMPXCHG8B(v0 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHG8B", 1, Operands { v0 })
|
|
// CMPXCHG8B m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHG8B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHGB performs "Compare and Exchange".
|
|
//
|
|
// Mnemonic : CMPXCHG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPXCHGB r8, r8
|
|
// * CMPXCHGB r8, m8
|
|
//
|
|
func (self *Program) CMPXCHGB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHGB", 2, Operands { v0, v1 })
|
|
// CMPXCHGB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb0)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// CMPXCHGB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHGB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHGL performs "Compare and Exchange".
|
|
//
|
|
// Mnemonic : CMPXCHG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPXCHGL r32, r32
|
|
// * CMPXCHGL r32, m32
|
|
//
|
|
func (self *Program) CMPXCHGL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHGL", 2, Operands { v0, v1 })
|
|
// CMPXCHGL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// CMPXCHGL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHGL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHGQ performs "Compare and Exchange".
|
|
//
|
|
// Mnemonic : CMPXCHG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPXCHGQ r64, r64
|
|
// * CMPXCHGQ r64, m64
|
|
//
|
|
func (self *Program) CMPXCHGQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHGQ", 2, Operands { v0, v1 })
|
|
// CMPXCHGQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// CMPXCHGQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHGQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CMPXCHGW performs "Compare and Exchange".
|
|
//
|
|
// Mnemonic : CMPXCHG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CMPXCHGW r16, r16
|
|
// * CMPXCHGW r16, m16
|
|
//
|
|
func (self *Program) CMPXCHGW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CMPXCHGW", 2, Operands { v0, v1 })
|
|
// CMPXCHGW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// CMPXCHGW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CMPXCHGW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// COMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : COMISD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * COMISD xmm, xmm [SSE2]
|
|
// * COMISD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) COMISD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("COMISD", 2, Operands { v0, v1 })
|
|
// COMISD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// COMISD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for COMISD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// COMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : COMISS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * COMISS xmm, xmm [SSE]
|
|
// * COMISS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) COMISS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("COMISS", 2, Operands { v0, v1 })
|
|
// COMISS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// COMISS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for COMISS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CPUID performs "CPU Identification".
|
|
//
|
|
// Mnemonic : CPUID
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CPUID [CPUID]
|
|
//
|
|
func (self *Program) CPUID() *Instruction {
|
|
p := self.alloc("CPUID", 0, Operands { })
|
|
// CPUID
|
|
self.require(ISA_CPUID)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0xa2)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CQTO performs "Convert Quadword to Octaword".
|
|
//
|
|
// Mnemonic : CQO
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CQTO
|
|
//
|
|
func (self *Program) CQTO() *Instruction {
|
|
p := self.alloc("CQTO", 0, Operands { })
|
|
// CQTO
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x99)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CRC32B performs "Accumulate CRC32 Value".
|
|
//
|
|
// Mnemonic : CRC32
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CRC32B r8, r32 [SSE4.2]
|
|
// * CRC32B m8, r32 [SSE4.2]
|
|
// * CRC32B r8, r64 [SSE4.2]
|
|
// * CRC32B m8, r64 [SSE4.2]
|
|
//
|
|
func (self *Program) CRC32B(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CRC32B", 2, Operands { v0, v1 })
|
|
// CRC32B r8, r32
|
|
if isReg8(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CRC32B m8, r32
|
|
if isM8(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CRC32B r8, r64
|
|
if isReg8(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CRC32B m8, r64
|
|
if isM8(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CRC32B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CRC32L performs "Accumulate CRC32 Value".
|
|
//
|
|
// Mnemonic : CRC32
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CRC32L r32, r32 [SSE4.2]
|
|
// * CRC32L m32, r32 [SSE4.2]
|
|
//
|
|
func (self *Program) CRC32L(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CRC32L", 2, Operands { v0, v1 })
|
|
// CRC32L r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CRC32L m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CRC32L")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CRC32Q performs "Accumulate CRC32 Value".
|
|
//
|
|
// Mnemonic : CRC32
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CRC32Q r64, r64 [SSE4.2]
|
|
// * CRC32Q m64, r64 [SSE4.2]
|
|
//
|
|
func (self *Program) CRC32Q(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CRC32Q", 2, Operands { v0, v1 })
|
|
// CRC32Q r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CRC32Q m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CRC32Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CRC32W performs "Accumulate CRC32 Value".
|
|
//
|
|
// Mnemonic : CRC32
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CRC32W r16, r32 [SSE4.2]
|
|
// * CRC32W m16, r32 [SSE4.2]
|
|
//
|
|
func (self *Program) CRC32W(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CRC32W", 2, Operands { v0, v1 })
|
|
// CRC32W r16, r32
|
|
if isReg16(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CRC32W m16, r32
|
|
if isM16(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CRC32W")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTDQ2PD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTDQ2PD xmm, xmm [SSE2]
|
|
// * CVTDQ2PD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTDQ2PD", 2, Operands { v0, v1 })
|
|
// CVTDQ2PD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTDQ2PD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTDQ2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTDQ2PS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTDQ2PS xmm, xmm [SSE2]
|
|
// * CVTDQ2PS m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTDQ2PS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTDQ2PS", 2, Operands { v0, v1 })
|
|
// CVTDQ2PS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTDQ2PS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTDQ2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTPD2DQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPD2DQ xmm, xmm [SSE2]
|
|
// * CVTPD2DQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTPD2DQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPD2DQ", 2, Operands { v0, v1 })
|
|
// CVTPD2DQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPD2DQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPD2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPD2PI performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTPD2PI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPD2PI xmm, mm [SSE]
|
|
// * CVTPD2PI m128, mm [SSE]
|
|
//
|
|
func (self *Program) CVTPD2PI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPD2PI", 2, Operands { v0, v1 })
|
|
// CVTPD2PI xmm, mm
|
|
if isXMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPD2PI m128, mm
|
|
if isM128(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPD2PI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTPD2PS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPD2PS xmm, xmm [SSE2]
|
|
// * CVTPD2PS m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTPD2PS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPD2PS", 2, Operands { v0, v1 })
|
|
// CVTPD2PS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPD2PS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPD2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPI2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTPI2PD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPI2PD mm, xmm [SSE2]
|
|
// * CVTPI2PD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTPI2PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPI2PD", 2, Operands { v0, v1 })
|
|
// CVTPI2PD mm, xmm
|
|
if isMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPI2PD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPI2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPI2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTPI2PS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPI2PS mm, xmm [SSE]
|
|
// * CVTPI2PS m64, xmm [SSE]
|
|
//
|
|
func (self *Program) CVTPI2PS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPI2PS", 2, Operands { v0, v1 })
|
|
// CVTPI2PS mm, xmm
|
|
if isMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPI2PS m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPI2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTPS2DQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPS2DQ xmm, xmm [SSE2]
|
|
// * CVTPS2DQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTPS2DQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPS2DQ", 2, Operands { v0, v1 })
|
|
// CVTPS2DQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPS2DQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPS2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values".
|
|
//
|
|
// Mnemonic : CVTPS2PD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPS2PD xmm, xmm [SSE2]
|
|
// * CVTPS2PD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTPS2PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPS2PD", 2, Operands { v0, v1 })
|
|
// CVTPS2PD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPS2PD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPS2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTPS2PI performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTPS2PI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTPS2PI xmm, mm [SSE]
|
|
// * CVTPS2PI m64, mm [SSE]
|
|
//
|
|
func (self *Program) CVTPS2PI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTPS2PI", 2, Operands { v0, v1 })
|
|
// CVTPS2PI xmm, mm
|
|
if isXMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTPS2PI m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTPS2PI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer".
|
|
//
|
|
// Mnemonic : CVTSD2SI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTSD2SI xmm, r32 [SSE2]
|
|
// * CVTSD2SI m64, r32 [SSE2]
|
|
// * CVTSD2SI xmm, r64 [SSE2]
|
|
// * CVTSD2SI m64, r64 [SSE2]
|
|
//
|
|
func (self *Program) CVTSD2SI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSD2SI", 2, Operands { v0, v1 })
|
|
// CVTSD2SI xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSD2SI m64, r32
|
|
if isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTSD2SI xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSD2SI m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSD2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value".
|
|
//
|
|
// Mnemonic : CVTSD2SS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTSD2SS xmm, xmm [SSE2]
|
|
// * CVTSD2SS m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTSD2SS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSD2SS", 2, Operands { v0, v1 })
|
|
// CVTSD2SS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSD2SS m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSD2SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value".
|
|
//
|
|
// Mnemonic : CVTSI2SD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTSI2SD r32, xmm [SSE2]
|
|
// * CVTSI2SD r64, xmm [SSE2]
|
|
// * CVTSI2SD m32, xmm [SSE2]
|
|
// * CVTSI2SD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTSI2SD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSI2SD", 2, Operands { v0, v1 })
|
|
// CVTSI2SD r32, xmm
|
|
if isReg32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSI2SD r64, xmm
|
|
if isReg64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSI2SD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTSI2SD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSI2SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value".
|
|
//
|
|
// Mnemonic : CVTSI2SS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTSI2SS r32, xmm [SSE]
|
|
// * CVTSI2SS r64, xmm [SSE]
|
|
// * CVTSI2SS m32, xmm [SSE]
|
|
// * CVTSI2SS m64, xmm [SSE]
|
|
//
|
|
func (self *Program) CVTSI2SS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSI2SS", 2, Operands { v0, v1 })
|
|
// CVTSI2SS r32, xmm
|
|
if isReg32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSI2SS r64, xmm
|
|
if isReg64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSI2SS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTSI2SS m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSI2SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value".
|
|
//
|
|
// Mnemonic : CVTSS2SD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTSS2SD xmm, xmm [SSE2]
|
|
// * CVTSS2SD m32, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTSS2SD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSS2SD", 2, Operands { v0, v1 })
|
|
// CVTSS2SD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSS2SD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSS2SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer".
|
|
//
|
|
// Mnemonic : CVTSS2SI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTSS2SI xmm, r32 [SSE]
|
|
// * CVTSS2SI m32, r32 [SSE]
|
|
// * CVTSS2SI xmm, r64 [SSE]
|
|
// * CVTSS2SI m32, r64 [SSE]
|
|
//
|
|
func (self *Program) CVTSS2SI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTSS2SI", 2, Operands { v0, v1 })
|
|
// CVTSS2SI xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSS2SI m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTSS2SI xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTSS2SI m32, r64
|
|
if isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTSS2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTTPD2DQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTTPD2DQ xmm, xmm [SSE2]
|
|
// * CVTTPD2DQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTTPD2DQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTPD2DQ", 2, Operands { v0, v1 })
|
|
// CVTTPD2DQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTPD2DQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTPD2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTPD2PI performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTTPD2PI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTTPD2PI xmm, mm [SSE2]
|
|
// * CVTTPD2PI m128, mm [SSE2]
|
|
//
|
|
func (self *Program) CVTTPD2PI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTPD2PI", 2, Operands { v0, v1 })
|
|
// CVTTPD2PI xmm, mm
|
|
if isXMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTPD2PI m128, mm
|
|
if isM128(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTPD2PI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTTPS2DQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTTPS2DQ xmm, xmm [SSE2]
|
|
// * CVTTPS2DQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) CVTTPS2DQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTPS2DQ", 2, Operands { v0, v1 })
|
|
// CVTTPS2DQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTPS2DQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTPS2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTPS2PI performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : CVTTPS2PI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * CVTTPS2PI xmm, mm [SSE]
|
|
// * CVTTPS2PI m64, mm [SSE]
|
|
//
|
|
func (self *Program) CVTTPS2PI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTPS2PI", 2, Operands { v0, v1 })
|
|
// CVTTPS2PI xmm, mm
|
|
if isXMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTPS2PI m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTPS2PI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer".
|
|
//
|
|
// Mnemonic : CVTTSD2SI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTTSD2SI xmm, r32 [SSE2]
|
|
// * CVTTSD2SI m64, r32 [SSE2]
|
|
// * CVTTSD2SI xmm, r64 [SSE2]
|
|
// * CVTTSD2SI m64, r64 [SSE2]
|
|
//
|
|
func (self *Program) CVTTSD2SI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTSD2SI", 2, Operands { v0, v1 })
|
|
// CVTTSD2SI xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTSD2SI m64, r32
|
|
if isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTTSD2SI xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTSD2SI m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTSD2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer".
|
|
//
|
|
// Mnemonic : CVTTSS2SI
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * CVTTSS2SI xmm, r32 [SSE]
|
|
// * CVTTSS2SI m32, r32 [SSE]
|
|
// * CVTTSS2SI xmm, r64 [SSE]
|
|
// * CVTTSS2SI m32, r64 [SSE]
|
|
//
|
|
func (self *Program) CVTTSS2SI(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("CVTTSS2SI", 2, Operands { v0, v1 })
|
|
// CVTTSS2SI xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTSS2SI m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// CVTTSS2SI xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// CVTTSS2SI m32, r64
|
|
if isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for CVTTSS2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// CWTD performs "Convert Word to Doubleword".
|
|
//
|
|
// Mnemonic : CWD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CWTD
|
|
//
|
|
func (self *Program) CWTD() *Instruction {
|
|
p := self.alloc("CWTD", 0, Operands { })
|
|
// CWTD
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x99)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// CWTL performs "Convert Word to Doubleword".
|
|
//
|
|
// Mnemonic : CWDE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * CWTL
|
|
//
|
|
func (self *Program) CWTL() *Instruction {
|
|
p := self.alloc("CWTL", 0, Operands { })
|
|
// CWTL
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x98)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// DECB performs "Decrement by 1".
|
|
//
|
|
// Mnemonic : DEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DECB r8
|
|
// * DECB m8
|
|
//
|
|
func (self *Program) DECB(v0 interface{}) *Instruction {
|
|
p := self.alloc("DECB", 1, Operands { v0 })
|
|
// DECB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xfe)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DECB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xfe)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DECB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DECL performs "Decrement by 1".
|
|
//
|
|
// Mnemonic : DEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DECL r32
|
|
// * DECL m32
|
|
//
|
|
func (self *Program) DECL(v0 interface{}) *Instruction {
|
|
p := self.alloc("DECL", 1, Operands { v0 })
|
|
// DECL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DECL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DECL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DECQ performs "Decrement by 1".
|
|
//
|
|
// Mnemonic : DEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DECQ r64
|
|
// * DECQ m64
|
|
//
|
|
func (self *Program) DECQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("DECQ", 1, Operands { v0 })
|
|
// DECQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xff)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DECQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xff)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DECQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DECW performs "Decrement by 1".
|
|
//
|
|
// Mnemonic : DEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DECW r16
|
|
// * DECW m16
|
|
//
|
|
func (self *Program) DECW(v0 interface{}) *Instruction {
|
|
p := self.alloc("DECW", 1, Operands { v0 })
|
|
// DECW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xc8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DECW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DECW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVB performs "Unsigned Divide".
|
|
//
|
|
// Mnemonic : DIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVB r8
|
|
// * DIVB m8
|
|
//
|
|
func (self *Program) DIVB(v0 interface{}) *Instruction {
|
|
p := self.alloc("DIVB", 1, Operands { v0 })
|
|
// DIVB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVL performs "Unsigned Divide".
|
|
//
|
|
// Mnemonic : DIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVL r32
|
|
// * DIVL m32
|
|
//
|
|
func (self *Program) DIVL(v0 interface{}) *Instruction {
|
|
p := self.alloc("DIVL", 1, Operands { v0 })
|
|
// DIVL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVPD performs "Divide Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DIVPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVPD xmm, xmm [SSE2]
|
|
// * DIVPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) DIVPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("DIVPD", 2, Operands { v0, v1 })
|
|
// DIVPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVPS performs "Divide Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DIVPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVPS xmm, xmm [SSE]
|
|
// * DIVPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) DIVPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("DIVPS", 2, Operands { v0, v1 })
|
|
// DIVPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVQ performs "Unsigned Divide".
|
|
//
|
|
// Mnemonic : DIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVQ r64
|
|
// * DIVQ m64
|
|
//
|
|
func (self *Program) DIVQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("DIVQ", 1, Operands { v0 })
|
|
// DIVQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVSD performs "Divide Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DIVSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVSD xmm, xmm [SSE2]
|
|
// * DIVSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) DIVSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("DIVSD", 2, Operands { v0, v1 })
|
|
// DIVSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVSS performs "Divide Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DIVSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVSS xmm, xmm [SSE]
|
|
// * DIVSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) DIVSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("DIVSS", 2, Operands { v0, v1 })
|
|
// DIVSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DIVW performs "Unsigned Divide".
|
|
//
|
|
// Mnemonic : DIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DIVW r16
|
|
// * DIVW m16
|
|
//
|
|
func (self *Program) DIVW(v0 interface{}) *Instruction {
|
|
p := self.alloc("DIVW", 1, Operands { v0 })
|
|
// DIVW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// DIVW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DIVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DPPD performs "Dot Product of Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DPPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DPPD imm8, xmm, xmm [SSE4.1]
|
|
// * DPPD imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) DPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("DPPD", 3, Operands { v0, v1, v2 })
|
|
// DPPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// DPPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DPPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// DPPS performs "Dot Product of Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : DPPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * DPPS imm8, xmm, xmm [SSE4.1]
|
|
// * DPPS imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) DPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("DPPS", 3, Operands { v0, v1, v2 })
|
|
// DPPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// DPPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for DPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// EMMS performs "Exit MMX State".
|
|
//
|
|
// Mnemonic : EMMS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * EMMS [MMX]
|
|
//
|
|
func (self *Program) EMMS() *Instruction {
|
|
p := self.alloc("EMMS", 0, Operands { })
|
|
// EMMS
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x77)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// EXTRACTPS performs "Extract Packed Single Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : EXTRACTPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * EXTRACTPS imm8, xmm, r32 [SSE4.1]
|
|
// * EXTRACTPS imm8, xmm, m32 [SSE4.1]
|
|
//
|
|
func (self *Program) EXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("EXTRACTPS", 3, Operands { v0, v1, v2 })
|
|
// EXTRACTPS imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// EXTRACTPS imm8, xmm, m32
|
|
if isImm8(v0) && isXMM(v1) && isM32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for EXTRACTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// EXTRQ performs "Extract Field".
|
|
//
|
|
// Mnemonic : EXTRQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * EXTRQ xmm, xmm [SSE4A]
|
|
// * EXTRQ imm8, imm8, xmm [SSE4A]
|
|
//
|
|
func (self *Program) EXTRQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("EXTRQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("EXTRQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction EXTRQ takes 2 or 3 operands")
|
|
}
|
|
// EXTRQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// EXTRQ imm8, imm8, xmm
|
|
if len(vv) == 1 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for EXTRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// FEMMS performs "Fast Exit Multimedia State".
|
|
//
|
|
// Mnemonic : FEMMS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * FEMMS [FEMMS]
|
|
//
|
|
func (self *Program) FEMMS() *Instruction {
|
|
p := self.alloc("FEMMS", 0, Operands { })
|
|
// FEMMS
|
|
self.require(ISA_FEMMS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x0e)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// HADDPD performs "Packed Double-FP Horizontal Add".
|
|
//
|
|
// Mnemonic : HADDPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * HADDPD xmm, xmm [SSE3]
|
|
// * HADDPD m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) HADDPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("HADDPD", 2, Operands { v0, v1 })
|
|
// HADDPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// HADDPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for HADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// HADDPS performs "Packed Single-FP Horizontal Add".
|
|
//
|
|
// Mnemonic : HADDPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * HADDPS xmm, xmm [SSE3]
|
|
// * HADDPS m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) HADDPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("HADDPS", 2, Operands { v0, v1 })
|
|
// HADDPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// HADDPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for HADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// HSUBPD performs "Packed Double-FP Horizontal Subtract".
|
|
//
|
|
// Mnemonic : HSUBPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * HSUBPD xmm, xmm [SSE3]
|
|
// * HSUBPD m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) HSUBPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("HSUBPD", 2, Operands { v0, v1 })
|
|
// HSUBPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// HSUBPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for HSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// HSUBPS performs "Packed Single-FP Horizontal Subtract".
|
|
//
|
|
// Mnemonic : HSUBPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * HSUBPS xmm, xmm [SSE3]
|
|
// * HSUBPS m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) HSUBPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("HSUBPS", 2, Operands { v0, v1 })
|
|
// HSUBPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// HSUBPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for HSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IDIVB performs "Signed Divide".
|
|
//
|
|
// Mnemonic : IDIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * IDIVB r8
|
|
// * IDIVB m8
|
|
//
|
|
func (self *Program) IDIVB(v0 interface{}) *Instruction {
|
|
p := self.alloc("IDIVB", 1, Operands { v0 })
|
|
// IDIVB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IDIVB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IDIVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IDIVL performs "Signed Divide".
|
|
//
|
|
// Mnemonic : IDIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * IDIVL r32
|
|
// * IDIVL m32
|
|
//
|
|
func (self *Program) IDIVL(v0 interface{}) *Instruction {
|
|
p := self.alloc("IDIVL", 1, Operands { v0 })
|
|
// IDIVL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IDIVL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IDIVL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IDIVQ performs "Signed Divide".
|
|
//
|
|
// Mnemonic : IDIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * IDIVQ r64
|
|
// * IDIVQ m64
|
|
//
|
|
func (self *Program) IDIVQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("IDIVQ", 1, Operands { v0 })
|
|
// IDIVQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IDIVQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IDIVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IDIVW performs "Signed Divide".
|
|
//
|
|
// Mnemonic : IDIV
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * IDIVW r16
|
|
// * IDIVW m16
|
|
//
|
|
func (self *Program) IDIVW(v0 interface{}) *Instruction {
|
|
p := self.alloc("IDIVW", 1, Operands { v0 })
|
|
// IDIVW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IDIVW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IDIVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IMULB performs "Signed Multiply".
|
|
//
|
|
// Mnemonic : IMUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * IMULB r8
|
|
// * IMULB m8
|
|
//
|
|
func (self *Program) IMULB(v0 interface{}) *Instruction {
|
|
p := self.alloc("IMULB", 1, Operands { v0 })
|
|
// IMULB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IMULB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IMULL performs "Signed Multiply".
|
|
//
|
|
// Mnemonic : IMUL
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * IMULL r32
|
|
// * IMULL m32
|
|
// * IMULL r32, r32
|
|
// * IMULL m32, r32
|
|
// * IMULL imm8, r32, r32
|
|
// * IMULL imm32, r32, r32
|
|
// * IMULL imm8, m32, r32
|
|
// * IMULL imm32, m32, r32
|
|
//
|
|
func (self *Program) IMULL(v0 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("IMULL", 1, Operands { v0 })
|
|
case 1 : p = self.alloc("IMULL", 2, Operands { v0, vv[0] })
|
|
case 2 : p = self.alloc("IMULL", 3, Operands { v0, vv[0], vv[1] })
|
|
default : panic("instruction IMULL takes 1 or 2 or 3 operands")
|
|
}
|
|
// IMULL r32
|
|
if len(vv) == 0 && isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULL m32
|
|
if len(vv) == 0 && isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULL r32, r32
|
|
if len(vv) == 1 && isReg32(v0) && isReg32(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULL m32, r32
|
|
if len(vv) == 1 && isM32(v0) && isReg32(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULL imm8, r32, r32
|
|
if len(vv) == 2 && isImm8(v0) && isReg32(vv[0]) && isReg32(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULL imm32, r32, r32
|
|
if len(vv) == 2 && isImm32(v0) && isReg32(vv[0]) && isReg32(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULL imm8, m32, r32
|
|
if len(vv) == 2 && isImm8(v0) && isM32(vv[0]) && isReg32(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULL imm32, m32, r32
|
|
if len(vv) == 2 && isImm32(v0) && isM32(vv[0]) && isReg32(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IMULL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IMULQ performs "Signed Multiply".
|
|
//
|
|
// Mnemonic : IMUL
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * IMULQ r64
|
|
// * IMULQ m64
|
|
// * IMULQ r64, r64
|
|
// * IMULQ m64, r64
|
|
// * IMULQ imm8, r64, r64
|
|
// * IMULQ imm32, r64, r64
|
|
// * IMULQ imm8, m64, r64
|
|
// * IMULQ imm32, m64, r64
|
|
//
|
|
func (self *Program) IMULQ(v0 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("IMULQ", 1, Operands { v0 })
|
|
case 1 : p = self.alloc("IMULQ", 2, Operands { v0, vv[0] })
|
|
case 2 : p = self.alloc("IMULQ", 3, Operands { v0, vv[0], vv[1] })
|
|
default : panic("instruction IMULQ takes 1 or 2 or 3 operands")
|
|
}
|
|
// IMULQ r64
|
|
if len(vv) == 0 && isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULQ m64
|
|
if len(vv) == 0 && isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULQ r64, r64
|
|
if len(vv) == 1 && isReg64(v0) && isReg64(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULQ m64, r64
|
|
if len(vv) == 1 && isM64(v0) && isReg64(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULQ imm8, r64, r64
|
|
if len(vv) == 2 && isImm8(v0) && isReg64(vv[0]) && isReg64(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULQ imm32, r64, r64
|
|
if len(vv) == 2 && isImm32(v0) && isReg64(vv[0]) && isReg64(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULQ imm8, m64, r64
|
|
if len(vv) == 2 && isImm8(v0) && isM64(vv[0]) && isReg64(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[2]), addr(v[1]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULQ imm32, m64, r64
|
|
if len(vv) == 2 && isImm32(v0) && isM64(vv[0]) && isReg64(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[2]), addr(v[1]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IMULQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// IMULW performs "Signed Multiply".
|
|
//
|
|
// Mnemonic : IMUL
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * IMULW r16
|
|
// * IMULW m16
|
|
// * IMULW r16, r16
|
|
// * IMULW m16, r16
|
|
// * IMULW imm8, r16, r16
|
|
// * IMULW imm16, r16, r16
|
|
// * IMULW imm8, m16, r16
|
|
// * IMULW imm16, m16, r16
|
|
//
|
|
func (self *Program) IMULW(v0 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("IMULW", 1, Operands { v0 })
|
|
case 1 : p = self.alloc("IMULW", 2, Operands { v0, vv[0] })
|
|
case 2 : p = self.alloc("IMULW", 3, Operands { v0, vv[0], vv[1] })
|
|
default : panic("instruction IMULW takes 1 or 2 or 3 operands")
|
|
}
|
|
// IMULW r16
|
|
if len(vv) == 0 && isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xe8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULW m16
|
|
if len(vv) == 0 && isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(5, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULW r16, r16
|
|
if len(vv) == 1 && isReg16(v0) && isReg16(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// IMULW m16, r16
|
|
if len(vv) == 1 && isM16(v0) && isReg16(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// IMULW imm8, r16, r16
|
|
if len(vv) == 2 && isImm8(v0) && isReg16(vv[0]) && isReg16(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULW imm16, r16, r16
|
|
if len(vv) == 2 && isImm16(v0) && isReg16(vv[0]) && isReg16(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULW imm8, m16, r16
|
|
if len(vv) == 2 && isImm8(v0) && isM16(vv[0]) && isReg16(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// IMULW imm16, m16, r16
|
|
if len(vv) == 2 && isImm16(v0) && isM16(vv[0]) && isReg16(vv[1]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for IMULW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INCB performs "Increment by 1".
|
|
//
|
|
// Mnemonic : INC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INCB r8
|
|
// * INCB m8
|
|
//
|
|
func (self *Program) INCB(v0 interface{}) *Instruction {
|
|
p := self.alloc("INCB", 1, Operands { v0 })
|
|
// INCB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// INCB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xfe)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INCB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INCL performs "Increment by 1".
|
|
//
|
|
// Mnemonic : INC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INCL r32
|
|
// * INCL m32
|
|
//
|
|
func (self *Program) INCL(v0 interface{}) *Instruction {
|
|
p := self.alloc("INCL", 1, Operands { v0 })
|
|
// INCL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// INCL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INCL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INCQ performs "Increment by 1".
|
|
//
|
|
// Mnemonic : INC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INCQ r64
|
|
// * INCQ m64
|
|
//
|
|
func (self *Program) INCQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("INCQ", 1, Operands { v0 })
|
|
// INCQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xff)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// INCQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xff)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INCQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INCW performs "Increment by 1".
|
|
//
|
|
// Mnemonic : INC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INCW r16
|
|
// * INCW m16
|
|
//
|
|
func (self *Program) INCW(v0 interface{}) *Instruction {
|
|
p := self.alloc("INCW", 1, Operands { v0 })
|
|
// INCW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// INCW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INCW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INSERTPS performs "Insert Packed Single Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : INSERTPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INSERTPS imm8, xmm, xmm [SSE4.1]
|
|
// * INSERTPS imm8, m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) INSERTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("INSERTPS", 3, Operands { v0, v1, v2 })
|
|
// INSERTPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// INSERTPS imm8, m32, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INSERTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INSERTQ performs "Insert Field".
|
|
//
|
|
// Mnemonic : INSERTQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INSERTQ xmm, xmm [SSE4A]
|
|
// * INSERTQ imm8, imm8, xmm, xmm [SSE4A]
|
|
//
|
|
func (self *Program) INSERTQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("INSERTQ", 2, Operands { v0, v1 })
|
|
case 2 : p = self.alloc("INSERTQ", 4, Operands { v0, v1, vv[0], vv[1] })
|
|
default : panic("instruction INSERTQ takes 2 or 4 operands")
|
|
}
|
|
// INSERTQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// INSERTQ imm8, imm8, xmm, xmm
|
|
if len(vv) == 2 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) && isXMM(vv[1]) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[3]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INSERTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// INT performs "Call to Interrupt Procedure".
|
|
//
|
|
// Mnemonic : INT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * INT 3
|
|
// * INT imm8
|
|
//
|
|
func (self *Program) INT(v0 interface{}) *Instruction {
|
|
p := self.alloc("INT", 1, Operands { v0 })
|
|
// INT 3
|
|
if isConst3(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xcc)
|
|
})
|
|
}
|
|
// INT imm8
|
|
if isImm8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xcd)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for INT")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JA performs "Jump if above (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : JA
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JA rel8
|
|
// * JA rel32
|
|
//
|
|
func (self *Program) JA(v0 interface{}) *Instruction {
|
|
p := self.alloc("JA", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JA rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x77)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JA rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x87)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JA label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x77)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x87)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JAE performs "Jump if above or equal (CF == 0)".
|
|
//
|
|
// Mnemonic : JAE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JAE rel8
|
|
// * JAE rel32
|
|
//
|
|
func (self *Program) JAE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JAE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JAE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JAE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JAE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JB performs "Jump if below (CF == 1)".
|
|
//
|
|
// Mnemonic : JB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JB rel8
|
|
// * JB rel32
|
|
//
|
|
func (self *Program) JB(v0 interface{}) *Instruction {
|
|
p := self.alloc("JB", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JB rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JB rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JB label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JBE performs "Jump if below or equal (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : JBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JBE rel8
|
|
// * JBE rel32
|
|
//
|
|
func (self *Program) JBE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JBE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JBE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x76)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JBE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x86)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JBE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x76)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x86)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JC performs "Jump if carry (CF == 1)".
|
|
//
|
|
// Mnemonic : JC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JC rel8
|
|
// * JC rel32
|
|
//
|
|
func (self *Program) JC(v0 interface{}) *Instruction {
|
|
p := self.alloc("JC", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JC rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JC rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JC label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JE performs "Jump if equal (ZF == 1)".
|
|
//
|
|
// Mnemonic : JE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JE rel8
|
|
// * JE rel32
|
|
//
|
|
func (self *Program) JE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x74)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x84)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x74)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x84)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JECXZ performs "Jump if ECX register is 0".
|
|
//
|
|
// Mnemonic : JECXZ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * JECXZ rel8
|
|
//
|
|
func (self *Program) JECXZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("JECXZ", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JECXZ rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe3)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JECXZ label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe3)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JECXZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JG performs "Jump if greater (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : JG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JG rel8
|
|
// * JG rel32
|
|
//
|
|
func (self *Program) JG(v0 interface{}) *Instruction {
|
|
p := self.alloc("JG", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JG rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7f)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JG rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8f)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JG label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7f)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8f)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JGE performs "Jump if greater or equal (SF == OF)".
|
|
//
|
|
// Mnemonic : JGE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JGE rel8
|
|
// * JGE rel32
|
|
//
|
|
func (self *Program) JGE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JGE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JGE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7d)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JGE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8d)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JGE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7d)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8d)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JL performs "Jump if less (SF != OF)".
|
|
//
|
|
// Mnemonic : JL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JL rel8
|
|
// * JL rel32
|
|
//
|
|
func (self *Program) JL(v0 interface{}) *Instruction {
|
|
p := self.alloc("JL", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JL rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7c)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JL rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8c)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JL label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7c)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8c)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JLE performs "Jump if less or equal (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : JLE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JLE rel8
|
|
// * JLE rel32
|
|
//
|
|
func (self *Program) JLE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JLE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JLE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7e)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JLE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8e)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JLE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7e)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8e)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JMP performs "Jump Unconditionally".
|
|
//
|
|
// Mnemonic : JMP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JMP rel8
|
|
// * JMP rel32
|
|
//
|
|
func (self *Program) JMP(v0 interface{}) *Instruction {
|
|
p := self.alloc("JMP", 1, Operands { v0 })
|
|
p.branch = _B_unconditional
|
|
// JMP rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xeb)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JMP rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe9)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JMP label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xeb)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe9)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JMP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JMPQ performs "Jump Unconditionally".
|
|
//
|
|
// Mnemonic : JMP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JMPQ r64
|
|
// * JMPQ m64
|
|
//
|
|
func (self *Program) JMPQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("JMPQ", 1, Operands { v0 })
|
|
// JMPQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// JMPQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JMPQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNA performs "Jump if not above (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : JNA
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNA rel8
|
|
// * JNA rel32
|
|
//
|
|
func (self *Program) JNA(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNA", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNA rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x76)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNA rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x86)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNA label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x76)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x86)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNAE performs "Jump if not above or equal (CF == 1)".
|
|
//
|
|
// Mnemonic : JNAE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNAE rel8
|
|
// * JNAE rel32
|
|
//
|
|
func (self *Program) JNAE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNAE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNAE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNAE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNAE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x72)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x82)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNB performs "Jump if not below (CF == 0)".
|
|
//
|
|
// Mnemonic : JNB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNB rel8
|
|
// * JNB rel32
|
|
//
|
|
func (self *Program) JNB(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNB", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNB rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNB rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNB label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNBE performs "Jump if not below or equal (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : JNBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNBE rel8
|
|
// * JNBE rel32
|
|
//
|
|
func (self *Program) JNBE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNBE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNBE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x77)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNBE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x87)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNBE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x77)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x87)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNC performs "Jump if not carry (CF == 0)".
|
|
//
|
|
// Mnemonic : JNC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNC rel8
|
|
// * JNC rel32
|
|
//
|
|
func (self *Program) JNC(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNC", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNC rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNC rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNC label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x73)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x83)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNE performs "Jump if not equal (ZF == 0)".
|
|
//
|
|
// Mnemonic : JNE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNE rel8
|
|
// * JNE rel32
|
|
//
|
|
func (self *Program) JNE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x75)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x85)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x75)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x85)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNG performs "Jump if not greater (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : JNG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNG rel8
|
|
// * JNG rel32
|
|
//
|
|
func (self *Program) JNG(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNG", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNG rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7e)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNG rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8e)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNG label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7e)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8e)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNGE performs "Jump if not greater or equal (SF != OF)".
|
|
//
|
|
// Mnemonic : JNGE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNGE rel8
|
|
// * JNGE rel32
|
|
//
|
|
func (self *Program) JNGE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNGE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNGE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7c)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNGE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8c)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNGE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7c)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8c)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNL performs "Jump if not less (SF == OF)".
|
|
//
|
|
// Mnemonic : JNL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNL rel8
|
|
// * JNL rel32
|
|
//
|
|
func (self *Program) JNL(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNL", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNL rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7d)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNL rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8d)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNL label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7d)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8d)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNLE performs "Jump if not less or equal (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : JNLE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNLE rel8
|
|
// * JNLE rel32
|
|
//
|
|
func (self *Program) JNLE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNLE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNLE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7f)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNLE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8f)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNLE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7f)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8f)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNO performs "Jump if not overflow (OF == 0)".
|
|
//
|
|
// Mnemonic : JNO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNO rel8
|
|
// * JNO rel32
|
|
//
|
|
func (self *Program) JNO(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNO", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNO rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x71)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNO rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x81)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNO label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x71)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x81)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNP performs "Jump if not parity (PF == 0)".
|
|
//
|
|
// Mnemonic : JNP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNP rel8
|
|
// * JNP rel32
|
|
//
|
|
func (self *Program) JNP(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNP", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNP rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7b)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNP rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8b)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNP label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7b)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8b)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNS performs "Jump if not sign (SF == 0)".
|
|
//
|
|
// Mnemonic : JNS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNS rel8
|
|
// * JNS rel32
|
|
//
|
|
func (self *Program) JNS(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNS", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNS rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x79)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNS rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x89)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNS label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x79)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x89)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JNZ performs "Jump if not zero (ZF == 0)".
|
|
//
|
|
// Mnemonic : JNZ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JNZ rel8
|
|
// * JNZ rel32
|
|
//
|
|
func (self *Program) JNZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("JNZ", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JNZ rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x75)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JNZ rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x85)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JNZ label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x75)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x85)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JNZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JO performs "Jump if overflow (OF == 1)".
|
|
//
|
|
// Mnemonic : JO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JO rel8
|
|
// * JO rel32
|
|
//
|
|
func (self *Program) JO(v0 interface{}) *Instruction {
|
|
p := self.alloc("JO", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JO rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x70)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JO rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x80)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JO label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x70)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x80)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JP performs "Jump if parity (PF == 1)".
|
|
//
|
|
// Mnemonic : JP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JP rel8
|
|
// * JP rel32
|
|
//
|
|
func (self *Program) JP(v0 interface{}) *Instruction {
|
|
p := self.alloc("JP", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JP rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7a)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JP rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8a)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JP label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7a)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8a)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JPE performs "Jump if parity even (PF == 1)".
|
|
//
|
|
// Mnemonic : JPE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JPE rel8
|
|
// * JPE rel32
|
|
//
|
|
func (self *Program) JPE(v0 interface{}) *Instruction {
|
|
p := self.alloc("JPE", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JPE rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7a)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JPE rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8a)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JPE label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7a)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8a)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JPE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JPO performs "Jump if parity odd (PF == 0)".
|
|
//
|
|
// Mnemonic : JPO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JPO rel8
|
|
// * JPO rel32
|
|
//
|
|
func (self *Program) JPO(v0 interface{}) *Instruction {
|
|
p := self.alloc("JPO", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JPO rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7b)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JPO rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8b)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JPO label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x7b)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x8b)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JPO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JRCXZ performs "Jump if RCX register is 0".
|
|
//
|
|
// Mnemonic : JRCXZ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * JRCXZ rel8
|
|
//
|
|
func (self *Program) JRCXZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("JRCXZ", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JRCXZ rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe3)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JRCXZ label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xe3)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JRCXZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JS performs "Jump if sign (SF == 1)".
|
|
//
|
|
// Mnemonic : JS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JS rel8
|
|
// * JS rel32
|
|
//
|
|
func (self *Program) JS(v0 interface{}) *Instruction {
|
|
p := self.alloc("JS", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JS rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x78)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JS rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x88)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JS label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x78)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x88)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// JZ performs "Jump if zero (ZF == 1)".
|
|
//
|
|
// Mnemonic : JZ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * JZ rel8
|
|
// * JZ rel32
|
|
//
|
|
func (self *Program) JZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("JZ", 1, Operands { v0 })
|
|
p.branch = _B_conditional
|
|
// JZ rel8
|
|
if isRel8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x74)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
}
|
|
// JZ rel32
|
|
if isRel32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x84)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
// JZ label
|
|
if isLabel(v0) {
|
|
p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x74)
|
|
m.imm1(relv(v[0]))
|
|
})
|
|
p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x84)
|
|
m.imm4(relv(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for JZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KADDB performs "ADD Two 8-bit Masks".
|
|
//
|
|
// Mnemonic : KADDB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KADDB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KADDB", 3, Operands { v0, v1, v2 })
|
|
// KADDB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KADDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KADDD performs "ADD Two 32-bit Masks".
|
|
//
|
|
// Mnemonic : KADDD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KADDD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KADDD", 3, Operands { v0, v1, v2 })
|
|
// KADDD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KADDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KADDQ performs "ADD Two 64-bit Masks".
|
|
//
|
|
// Mnemonic : KADDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KADDQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KADDQ", 3, Operands { v0, v1, v2 })
|
|
// KADDQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KADDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KADDW performs "ADD Two 16-bit Masks".
|
|
//
|
|
// Mnemonic : KADDW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KADDW k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KADDW", 3, Operands { v0, v1, v2 })
|
|
// KADDW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDB performs "Bitwise Logical AND 8-bit Masks".
|
|
//
|
|
// Mnemonic : KANDB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KANDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDB", 3, Operands { v0, v1, v2 })
|
|
// KANDB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDD performs "Bitwise Logical AND 32-bit Masks".
|
|
//
|
|
// Mnemonic : KANDD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDD", 3, Operands { v0, v1, v2 })
|
|
// KANDD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDNB performs "Bitwise Logical AND NOT 8-bit Masks".
|
|
//
|
|
// Mnemonic : KANDNB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDNB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KANDNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDNB", 3, Operands { v0, v1, v2 })
|
|
// KANDNB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDND performs "Bitwise Logical AND NOT 32-bit Masks".
|
|
//
|
|
// Mnemonic : KANDND
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDND k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDND", 3, Operands { v0, v1, v2 })
|
|
// KANDND k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDNQ performs "Bitwise Logical AND NOT 64-bit Masks".
|
|
//
|
|
// Mnemonic : KANDNQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDNQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDNQ", 3, Operands { v0, v1, v2 })
|
|
// KANDNQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDNQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDNW performs "Bitwise Logical AND NOT 16-bit Masks".
|
|
//
|
|
// Mnemonic : KANDNW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDNW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KANDNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDNW", 3, Operands { v0, v1, v2 })
|
|
// KANDNW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDNW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDQ performs "Bitwise Logical AND 64-bit Masks".
|
|
//
|
|
// Mnemonic : KANDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDQ", 3, Operands { v0, v1, v2 })
|
|
// KANDQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KANDW performs "Bitwise Logical AND 16-bit Masks".
|
|
//
|
|
// Mnemonic : KANDW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KANDW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KANDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KANDW", 3, Operands { v0, v1, v2 })
|
|
// KANDW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KANDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KMOVB performs "Move 8-bit Mask".
|
|
//
|
|
// Mnemonic : KMOVB
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * KMOVB k, k [AVX512DQ]
|
|
// * KMOVB r32, k [AVX512DQ]
|
|
// * KMOVB m8, k [AVX512DQ]
|
|
// * KMOVB k, r32 [AVX512DQ]
|
|
// * KMOVB k, m8 [AVX512DQ]
|
|
//
|
|
func (self *Program) KMOVB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KMOVB", 2, Operands { v0, v1 })
|
|
// KMOVB k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, nil, 0)
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVB r32, k
|
|
if isReg32(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[0], 0)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVB m8, k
|
|
if isM8(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, addr(v[0]), 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// KMOVB k, r32
|
|
if isK(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), nil, 0)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVB k, m8
|
|
if isK(v0) && isM8(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, addr(v[1]), 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KMOVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KMOVD performs "Move 32-bit Mask".
|
|
//
|
|
// Mnemonic : KMOVD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * KMOVD k, k [AVX512BW]
|
|
// * KMOVD r32, k [AVX512BW]
|
|
// * KMOVD m32, k [AVX512BW]
|
|
// * KMOVD k, r32 [AVX512BW]
|
|
// * KMOVD k, m32 [AVX512BW]
|
|
//
|
|
func (self *Program) KMOVD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KMOVD", 2, Operands { v0, v1 })
|
|
// KMOVD k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf9)
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVD r32, k
|
|
if isReg32(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, 0, v[0], 0)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVD m32, k
|
|
if isM32(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x81, 0, addr(v[0]), 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// KMOVD k, r32
|
|
if isK(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), nil, 0)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVD k, m32
|
|
if isK(v0) && isM32(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x81, 0, addr(v[1]), 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KMOVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KMOVQ performs "Move 64-bit Mask".
|
|
//
|
|
// Mnemonic : KMOVQ
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * KMOVQ k, k [AVX512BW]
|
|
// * KMOVQ r64, k [AVX512BW]
|
|
// * KMOVQ m64, k [AVX512BW]
|
|
// * KMOVQ k, r64 [AVX512BW]
|
|
// * KMOVQ k, m64 [AVX512BW]
|
|
//
|
|
func (self *Program) KMOVQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KMOVQ", 2, Operands { v0, v1 })
|
|
// KMOVQ k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf8)
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVQ r64, k
|
|
if isReg64(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVQ m64, k
|
|
if isM64(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x80, 0, addr(v[0]), 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// KMOVQ k, r64
|
|
if isK(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7))
|
|
m.emit(0xfb)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVQ k, m64
|
|
if isK(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x80, 0, addr(v[1]), 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KMOVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KMOVW performs "Move 16-bit Mask".
|
|
//
|
|
// Mnemonic : KMOVW
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * KMOVW k, k [AVX512F]
|
|
// * KMOVW r32, k [AVX512F]
|
|
// * KMOVW m16, k [AVX512F]
|
|
// * KMOVW k, r32 [AVX512F]
|
|
// * KMOVW k, m16 [AVX512F]
|
|
//
|
|
func (self *Program) KMOVW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KMOVW", 2, Operands { v0, v1 })
|
|
// KMOVW k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, nil, 0)
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVW r32, k
|
|
if isReg32(v0) && isK(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, v[0], 0)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVW m16, k
|
|
if isM16(v0) && isK(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, addr(v[0]), 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// KMOVW k, r32
|
|
if isK(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), nil, 0)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// KMOVW k, m16
|
|
if isK(v0) && isM16(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, addr(v[1]), 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KMOVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KNOTB performs "NOT 8-bit Mask Register".
|
|
//
|
|
// Mnemonic : KNOTB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KNOTB k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KNOTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KNOTB", 2, Operands { v0, v1 })
|
|
// KNOTB k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, nil, 0)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KNOTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KNOTD performs "NOT 32-bit Mask Register".
|
|
//
|
|
// Mnemonic : KNOTD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KNOTD k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KNOTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KNOTD", 2, Operands { v0, v1 })
|
|
// KNOTD k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf9)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KNOTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KNOTQ performs "NOT 64-bit Mask Register".
|
|
//
|
|
// Mnemonic : KNOTQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KNOTQ k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KNOTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KNOTQ", 2, Operands { v0, v1 })
|
|
// KNOTQ k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf8)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KNOTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KNOTW performs "NOT 16-bit Mask Register".
|
|
//
|
|
// Mnemonic : KNOTW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KNOTW k, k [AVX512F]
|
|
//
|
|
func (self *Program) KNOTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KNOTW", 2, Operands { v0, v1 })
|
|
// KNOTW k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, nil, 0)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KNOTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORB performs "Bitwise Logical OR 8-bit Masks".
|
|
//
|
|
// Mnemonic : KORB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KORB", 3, Operands { v0, v1, v2 })
|
|
// KORB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORD performs "Bitwise Logical OR 32-bit Masks".
|
|
//
|
|
// Mnemonic : KORD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KORD", 3, Operands { v0, v1, v2 })
|
|
// KORD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORQ performs "Bitwise Logical OR 64-bit Masks".
|
|
//
|
|
// Mnemonic : KORQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KORQ", 3, Operands { v0, v1, v2 })
|
|
// KORQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORTESTB performs "OR 8-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KORTESTB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORTESTB k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KORTESTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KORTESTB", 2, Operands { v0, v1 })
|
|
// KORTESTB k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, nil, 0)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORTESTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORTESTD performs "OR 32-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KORTESTD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORTESTD k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KORTESTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KORTESTD", 2, Operands { v0, v1 })
|
|
// KORTESTD k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf9)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORTESTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORTESTQ performs "OR 64-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KORTESTQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORTESTQ k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KORTESTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KORTESTQ", 2, Operands { v0, v1 })
|
|
// KORTESTQ k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf8)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORTESTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORTESTW performs "OR 16-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KORTESTW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORTESTW k, k [AVX512F]
|
|
//
|
|
func (self *Program) KORTESTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KORTESTW", 2, Operands { v0, v1 })
|
|
// KORTESTW k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, nil, 0)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORTESTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KORW performs "Bitwise Logical OR 16-bit Masks".
|
|
//
|
|
// Mnemonic : KORW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KORW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KORW", 3, Operands { v0, v1, v2 })
|
|
// KORW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KORW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTLB performs "Shift Left 8-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTLB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTLB imm8, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KSHIFTLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTLB", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTLB imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0x79)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTLB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTLD performs "Shift Left 32-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTLD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTLD imm8, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KSHIFTLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTLD", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTLD imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0x79)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTLQ performs "Shift Left 64-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTLQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTLQ imm8, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KSHIFTLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTLQ", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTLQ imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0xf9)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTLW performs "Shift Left 16-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTLW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTLW imm8, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KSHIFTLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTLW", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTLW imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0xf9)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTRB performs "Shift Right 8-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTRB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTRB imm8, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KSHIFTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTRB", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTRB imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0x79)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTRD performs "Shift Right 32-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTRD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTRD imm8, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KSHIFTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTRD", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTRD imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0x79)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTRD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTRQ performs "Shift Right 64-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTRQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTRQ imm8, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KSHIFTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTRQ", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTRQ imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0xf9)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KSHIFTRW performs "Shift Right 16-bit Masks".
|
|
//
|
|
// Mnemonic : KSHIFTRW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KSHIFTRW imm8, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KSHIFTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KSHIFTRW", 3, Operands { v0, v1, v2 })
|
|
// KSHIFTRW imm8, k, k
|
|
if isImm8(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3)
|
|
m.emit(0xf9)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KSHIFTRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KTESTB performs "Bit Test 8-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KTESTB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KTESTB k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KTESTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KTESTB", 2, Operands { v0, v1 })
|
|
// KTESTB k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, nil, 0)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KTESTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KTESTD performs "Bit Test 32-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KTESTD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KTESTD k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KTESTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KTESTD", 2, Operands { v0, v1 })
|
|
// KTESTD k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf9)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KTESTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KTESTQ performs "Bit Test 64-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KTESTQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KTESTQ k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KTESTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KTESTQ", 2, Operands { v0, v1 })
|
|
// KTESTQ k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xf8)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KTESTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KTESTW performs "Bit Test 16-bit Masks and Set Flags".
|
|
//
|
|
// Mnemonic : KTESTW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KTESTW k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KTESTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("KTESTW", 2, Operands { v0, v1 })
|
|
// KTESTW k, k
|
|
if isK(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, nil, 0)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KTESTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KUNPCKBW performs "Unpack and Interleave 8-bit Masks".
|
|
//
|
|
// Mnemonic : KUNPCKBW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KUNPCKBW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KUNPCKBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KUNPCKBW", 3, Operands { v0, v1, v2 })
|
|
// KUNPCKBW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KUNPCKBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KUNPCKDQ performs "Unpack and Interleave 32-bit Masks".
|
|
//
|
|
// Mnemonic : KUNPCKDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KUNPCKDQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KUNPCKDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KUNPCKDQ", 3, Operands { v0, v1, v2 })
|
|
// KUNPCKDQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KUNPCKDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KUNPCKWD performs "Unpack and Interleave 16-bit Masks".
|
|
//
|
|
// Mnemonic : KUNPCKWD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KUNPCKWD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KUNPCKWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KUNPCKWD", 3, Operands { v0, v1, v2 })
|
|
// KUNPCKWD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KUNPCKWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXNORB performs "Bitwise Logical XNOR 8-bit Masks".
|
|
//
|
|
// Mnemonic : KXNORB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXNORB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KXNORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXNORB", 3, Operands { v0, v1, v2 })
|
|
// KXNORB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXNORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXNORD performs "Bitwise Logical XNOR 32-bit Masks".
|
|
//
|
|
// Mnemonic : KXNORD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXNORD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KXNORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXNORD", 3, Operands { v0, v1, v2 })
|
|
// KXNORD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXNORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXNORQ performs "Bitwise Logical XNOR 64-bit Masks".
|
|
//
|
|
// Mnemonic : KXNORQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXNORQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KXNORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXNORQ", 3, Operands { v0, v1, v2 })
|
|
// KXNORQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXNORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXNORW performs "Bitwise Logical XNOR 16-bit Masks".
|
|
//
|
|
// Mnemonic : KXNORW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXNORW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KXNORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXNORW", 3, Operands { v0, v1, v2 })
|
|
// KXNORW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXNORW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXORB performs "Bitwise Logical XOR 8-bit Masks".
|
|
//
|
|
// Mnemonic : KXORB
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXORB k, k, k [AVX512DQ]
|
|
//
|
|
func (self *Program) KXORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXORB", 3, Operands { v0, v1, v2 })
|
|
// KXORB k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, nil, hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXORD performs "Bitwise Logical XOR 32-bit Masks".
|
|
//
|
|
// Mnemonic : KXORD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXORD k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXORD", 3, Operands { v0, v1, v2 })
|
|
// KXORD k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXORQ performs "Bitwise Logical XOR 64-bit Masks".
|
|
//
|
|
// Mnemonic : KXORQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXORQ k, k, k [AVX512BW]
|
|
//
|
|
func (self *Program) KXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXORQ", 3, Operands { v0, v1, v2 })
|
|
// KXORQ k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1)
|
|
m.emit(0xfc ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// KXORW performs "Bitwise Logical XOR 16-bit Masks".
|
|
//
|
|
// Mnemonic : KXORW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * KXORW k, k, k [AVX512F]
|
|
//
|
|
func (self *Program) KXORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("KXORW", 3, Operands { v0, v1, v2 })
|
|
// KXORW k, k, k
|
|
if isK(v0) && isK(v1) && isK(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainMask
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for KXORW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LDDQU performs "Load Unaligned Integer 128 Bits".
|
|
//
|
|
// Mnemonic : LDDQU
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LDDQU m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) LDDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LDDQU", 2, Operands { v0, v1 })
|
|
// LDDQU m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LDDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LDMXCSR performs "Load MXCSR Register".
|
|
//
|
|
// Mnemonic : LDMXCSR
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LDMXCSR m32 [SSE]
|
|
//
|
|
func (self *Program) LDMXCSR(v0 interface{}) *Instruction {
|
|
p := self.alloc("LDMXCSR", 1, Operands { v0 })
|
|
// LDMXCSR m32
|
|
if isM32(v0) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LDMXCSR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LEAL performs "Load Effective Address".
|
|
//
|
|
// Mnemonic : LEA
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LEAL m, r32
|
|
//
|
|
func (self *Program) LEAL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LEAL", 2, Operands { v0, v1 })
|
|
// LEAL m, r32
|
|
if isM(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LEAL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LEAQ performs "Load Effective Address".
|
|
//
|
|
// Mnemonic : LEA
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LEAQ m, r64
|
|
//
|
|
func (self *Program) LEAQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LEAQ", 2, Operands { v0, v1 })
|
|
// LEAQ m, r64
|
|
if isM(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LEAQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LEAW performs "Load Effective Address".
|
|
//
|
|
// Mnemonic : LEA
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LEAW m, r16
|
|
//
|
|
func (self *Program) LEAW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LEAW", 2, Operands { v0, v1 })
|
|
// LEAW m, r16
|
|
if isM(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LEAW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LFENCE performs "Load Fence".
|
|
//
|
|
// Mnemonic : LFENCE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * LFENCE [SSE2]
|
|
//
|
|
func (self *Program) LFENCE() *Instruction {
|
|
p := self.alloc("LFENCE", 0, Operands { })
|
|
// LFENCE
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.emit(0xe8)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// LZCNTL performs "Count the Number of Leading Zero Bits".
|
|
//
|
|
// Mnemonic : LZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * LZCNTL r32, r32 [LZCNT]
|
|
// * LZCNTL m32, r32 [LZCNT]
|
|
//
|
|
func (self *Program) LZCNTL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LZCNTL", 2, Operands { v0, v1 })
|
|
// LZCNTL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// LZCNTL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LZCNTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LZCNTQ performs "Count the Number of Leading Zero Bits".
|
|
//
|
|
// Mnemonic : LZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * LZCNTQ r64, r64 [LZCNT]
|
|
// * LZCNTQ m64, r64 [LZCNT]
|
|
//
|
|
func (self *Program) LZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LZCNTQ", 2, Operands { v0, v1 })
|
|
// LZCNTQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// LZCNTQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LZCNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// LZCNTW performs "Count the Number of Leading Zero Bits".
|
|
//
|
|
// Mnemonic : LZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * LZCNTW r16, r16 [LZCNT]
|
|
// * LZCNTW m16, r16 [LZCNT]
|
|
//
|
|
func (self *Program) LZCNTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("LZCNTW", 2, Operands { v0, v1 })
|
|
// LZCNTW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// LZCNTW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_LZCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for LZCNTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MASKMOVDQU performs "Store Selected Bytes of Double Quadword".
|
|
//
|
|
// Mnemonic : MASKMOVDQU
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MASKMOVDQU xmm, xmm [SSE2]
|
|
//
|
|
func (self *Program) MASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MASKMOVDQU", 2, Operands { v0, v1 })
|
|
// MASKMOVDQU xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MASKMOVDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MASKMOVQ performs "Store Selected Bytes of Quadword".
|
|
//
|
|
// Mnemonic : MASKMOVQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MASKMOVQ mm, mm [MMX+]
|
|
//
|
|
func (self *Program) MASKMOVQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MASKMOVQ", 2, Operands { v0, v1 })
|
|
// MASKMOVQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MASKMOVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MAXPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MAXPD xmm, xmm [SSE2]
|
|
// * MAXPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) MAXPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MAXPD", 2, Operands { v0, v1 })
|
|
// MAXPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MAXPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MAXPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MAXPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MAXPS xmm, xmm [SSE]
|
|
// * MAXPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) MAXPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MAXPS", 2, Operands { v0, v1 })
|
|
// MAXPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MAXPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MAXPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MAXSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MAXSD xmm, xmm [SSE2]
|
|
// * MAXSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) MAXSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MAXSD", 2, Operands { v0, v1 })
|
|
// MAXSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MAXSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MAXSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MAXSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MAXSS xmm, xmm [SSE]
|
|
// * MAXSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) MAXSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MAXSS", 2, Operands { v0, v1 })
|
|
// MAXSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MAXSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MAXSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MFENCE performs "Memory Fence".
|
|
//
|
|
// Mnemonic : MFENCE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MFENCE [SSE2]
|
|
//
|
|
func (self *Program) MFENCE() *Instruction {
|
|
p := self.alloc("MFENCE", 0, Operands { })
|
|
// MFENCE
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.emit(0xf0)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// MINPD performs "Return Minimum Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MINPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MINPD xmm, xmm [SSE2]
|
|
// * MINPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) MINPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MINPD", 2, Operands { v0, v1 })
|
|
// MINPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MINPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MINPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MINPS performs "Return Minimum Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MINPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MINPS xmm, xmm [SSE]
|
|
// * MINPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) MINPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MINPS", 2, Operands { v0, v1 })
|
|
// MINPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MINPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MINPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MINSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MINSD xmm, xmm [SSE2]
|
|
// * MINSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) MINSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MINSD", 2, Operands { v0, v1 })
|
|
// MINSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MINSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MINSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MINSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MINSS xmm, xmm [SSE]
|
|
// * MINSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) MINSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MINSS", 2, Operands { v0, v1 })
|
|
// MINSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MINSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MINSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MONITOR performs "Monitor a Linear Address Range".
|
|
//
|
|
// Mnemonic : MONITOR
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MONITOR [MONITOR]
|
|
//
|
|
func (self *Program) MONITOR() *Instruction {
|
|
p := self.alloc("MONITOR", 0, Operands { })
|
|
// MONITOR
|
|
self.require(ISA_MONITOR)
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xc8)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// MONITORX performs "Monitor a Linear Address Range with Timeout".
|
|
//
|
|
// Mnemonic : MONITORX
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MONITORX [MONITORX]
|
|
//
|
|
func (self *Program) MONITORX() *Instruction {
|
|
p := self.alloc("MONITORX", 0, Operands { })
|
|
// MONITORX
|
|
self.require(ISA_MONITORX)
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xfa)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// MOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVAPD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVAPD xmm, xmm [SSE2]
|
|
// * MOVAPD m128, xmm [SSE2]
|
|
// * MOVAPD xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVAPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVAPD", 2, Operands { v0, v1 })
|
|
// MOVAPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVAPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVAPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVAPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVAPS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVAPS xmm, xmm [SSE]
|
|
// * MOVAPS m128, xmm [SSE]
|
|
// * MOVAPS xmm, m128 [SSE]
|
|
//
|
|
func (self *Program) MOVAPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVAPS", 2, Operands { v0, v1 })
|
|
// MOVAPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVAPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVAPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVAPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVB performs "Move".
|
|
//
|
|
// Mnemonic : MOV
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * MOVB imm8, r8
|
|
// * MOVB r8, r8
|
|
// * MOVB m8, r8
|
|
// * MOVB imm8, m8
|
|
// * MOVB r8, m8
|
|
//
|
|
func (self *Program) MOVB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVB", 2, Operands { v0, v1 })
|
|
// MOVB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xb0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc6)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVBEL performs "Move Data After Swapping Bytes".
|
|
//
|
|
// Mnemonic : MOVBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVBEL m32, r32 [MOVBE]
|
|
// * MOVBEL r32, m32 [MOVBE]
|
|
//
|
|
func (self *Program) MOVBEL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVBEL", 2, Operands { v0, v1 })
|
|
// MOVBEL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVBEL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVBEL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVBEQ performs "Move Data After Swapping Bytes".
|
|
//
|
|
// Mnemonic : MOVBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVBEQ m64, r64 [MOVBE]
|
|
// * MOVBEQ r64, m64 [MOVBE]
|
|
//
|
|
func (self *Program) MOVBEQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVBEQ", 2, Operands { v0, v1 })
|
|
// MOVBEQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVBEQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVBEQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVBEW performs "Move Data After Swapping Bytes".
|
|
//
|
|
// Mnemonic : MOVBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVBEW m16, r16 [MOVBE]
|
|
// * MOVBEW r16, m16 [MOVBE]
|
|
//
|
|
func (self *Program) MOVBEW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVBEW", 2, Operands { v0, v1 })
|
|
// MOVBEW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVBEW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
self.require(ISA_MOVBE)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVBEW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVD performs "Move Doubleword".
|
|
//
|
|
// Mnemonic : MOVD
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * MOVD mm, r32 [MMX]
|
|
// * MOVD r32, mm [MMX]
|
|
// * MOVD m32, mm [MMX]
|
|
// * MOVD mm, m32 [MMX]
|
|
// * MOVD xmm, r32 [SSE2]
|
|
// * MOVD r32, xmm [SSE2]
|
|
// * MOVD m32, xmm [SSE2]
|
|
// * MOVD xmm, m32 [SSE2]
|
|
//
|
|
func (self *Program) MOVD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVD", 2, Operands { v0, v1 })
|
|
// MOVD mm, r32
|
|
if isMM(v0) && isReg32(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVD r32, mm
|
|
if isReg32(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVD m32, mm
|
|
if isM32(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVD mm, m32
|
|
if isMM(v0) && isM32(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// MOVD xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVD r32, xmm
|
|
if isReg32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVD xmm, m32
|
|
if isXMM(v0) && isM32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVDDUP performs "Move One Double-FP and Duplicate".
|
|
//
|
|
// Mnemonic : MOVDDUP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVDDUP xmm, xmm [SSE3]
|
|
// * MOVDDUP m64, xmm [SSE3]
|
|
//
|
|
func (self *Program) MOVDDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVDDUP", 2, Operands { v0, v1 })
|
|
// MOVDDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVDDUP m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVDDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVDQ2Q performs "Move Quadword from XMM to MMX Technology Register".
|
|
//
|
|
// Mnemonic : MOVDQ2Q
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVDQ2Q xmm, mm [SSE2]
|
|
//
|
|
func (self *Program) MOVDQ2Q(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVDQ2Q", 2, Operands { v0, v1 })
|
|
// MOVDQ2Q xmm, mm
|
|
if isXMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVDQ2Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVDQA performs "Move Aligned Double Quadword".
|
|
//
|
|
// Mnemonic : MOVDQA
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVDQA xmm, xmm [SSE2]
|
|
// * MOVDQA m128, xmm [SSE2]
|
|
// * MOVDQA xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVDQA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVDQA", 2, Operands { v0, v1 })
|
|
// MOVDQA xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVDQA m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVDQA xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVDQA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVDQU performs "Move Unaligned Double Quadword".
|
|
//
|
|
// Mnemonic : MOVDQU
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVDQU xmm, xmm [SSE2]
|
|
// * MOVDQU m128, xmm [SSE2]
|
|
// * MOVDQU xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVDQU", 2, Operands { v0, v1 })
|
|
// MOVDQU xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVDQU m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVDQU xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low".
|
|
//
|
|
// Mnemonic : MOVHLPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVHLPS xmm, xmm [SSE]
|
|
//
|
|
func (self *Program) MOVHLPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVHLPS", 2, Operands { v0, v1 })
|
|
// MOVHLPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVHLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVHPD performs "Move High Packed Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MOVHPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVHPD m64, xmm [SSE2]
|
|
// * MOVHPD xmm, m64 [SSE2]
|
|
//
|
|
func (self *Program) MOVHPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVHPD", 2, Operands { v0, v1 })
|
|
// MOVHPD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVHPD xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVHPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVHPS performs "Move High Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVHPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVHPS m64, xmm [SSE]
|
|
// * MOVHPS xmm, m64 [SSE]
|
|
//
|
|
func (self *Program) MOVHPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVHPS", 2, Operands { v0, v1 })
|
|
// MOVHPS m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVHPS xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVL performs "Move".
|
|
//
|
|
// Mnemonic : MOV
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * MOVL imm32, r32
|
|
// * MOVL r32, r32
|
|
// * MOVL m32, r32
|
|
// * MOVL imm32, m32
|
|
// * MOVL r32, m32
|
|
//
|
|
func (self *Program) MOVL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVL", 2, Operands { v0, v1 })
|
|
// MOVL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xb8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High".
|
|
//
|
|
// Mnemonic : MOVLHPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVLHPS xmm, xmm [SSE]
|
|
//
|
|
func (self *Program) MOVLHPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVLHPS", 2, Operands { v0, v1 })
|
|
// MOVLHPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVLHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVLPD performs "Move Low Packed Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MOVLPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVLPD m64, xmm [SSE2]
|
|
// * MOVLPD xmm, m64 [SSE2]
|
|
//
|
|
func (self *Program) MOVLPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVLPD", 2, Operands { v0, v1 })
|
|
// MOVLPD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVLPD xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVLPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVLPS performs "Move Low Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVLPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVLPS m64, xmm [SSE]
|
|
// * MOVLPS xmm, m64 [SSE]
|
|
//
|
|
func (self *Program) MOVLPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVLPS", 2, Operands { v0, v1 })
|
|
// MOVLPS m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVLPS xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask".
|
|
//
|
|
// Mnemonic : MOVMSKPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVMSKPD xmm, r32 [SSE2]
|
|
//
|
|
func (self *Program) MOVMSKPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVMSKPD", 2, Operands { v0, v1 })
|
|
// MOVMSKPD xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVMSKPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask".
|
|
//
|
|
// Mnemonic : MOVMSKPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVMSKPS xmm, r32 [SSE]
|
|
//
|
|
func (self *Program) MOVMSKPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVMSKPS", 2, Operands { v0, v1 })
|
|
// MOVMSKPS xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVMSKPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTDQ xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVNTDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTDQ", 2, Operands { v0, v1 })
|
|
// MOVNTDQ xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint".
|
|
//
|
|
// Mnemonic : MOVNTDQA
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTDQA m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) MOVNTDQA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTDQA", 2, Operands { v0, v1 })
|
|
// MOVNTDQA m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTDQA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTIL performs "Store Doubleword Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTI
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTIL r32, m32 [SSE2]
|
|
//
|
|
func (self *Program) MOVNTIL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTIL", 2, Operands { v0, v1 })
|
|
// MOVNTIL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTIL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTIQ performs "Store Doubleword Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTI
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTIQ r64, m64 [SSE2]
|
|
//
|
|
func (self *Program) MOVNTIQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTIQ", 2, Operands { v0, v1 })
|
|
// MOVNTIQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTIQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTPD xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVNTPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTPD", 2, Operands { v0, v1 })
|
|
// MOVNTPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTPS xmm, m128 [SSE]
|
|
//
|
|
func (self *Program) MOVNTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTPS", 2, Operands { v0, v1 })
|
|
// MOVNTPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTQ performs "Store of Quadword Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTQ mm, m64 [MMX+]
|
|
//
|
|
func (self *Program) MOVNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTQ", 2, Operands { v0, v1 })
|
|
// MOVNTQ mm, m64
|
|
if isMM(v0) && isM64(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTSD performs "Store Scalar Double-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTSD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTSD xmm, m64 [SSE4A]
|
|
//
|
|
func (self *Program) MOVNTSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTSD", 2, Operands { v0, v1 })
|
|
// MOVNTSD xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVNTSS performs "Store Scalar Single-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : MOVNTSS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVNTSS xmm, m32 [SSE4A]
|
|
//
|
|
func (self *Program) MOVNTSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVNTSS", 2, Operands { v0, v1 })
|
|
// MOVNTSS xmm, m32
|
|
if isXMM(v0) && isM32(v1) {
|
|
self.require(ISA_SSE4A)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVNTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVQ performs "Move".
|
|
//
|
|
// Mnemonic : MOV
|
|
// Supported forms : (16 forms)
|
|
//
|
|
// * MOVQ imm32, r64
|
|
// * MOVQ imm64, r64
|
|
// * MOVQ r64, r64
|
|
// * MOVQ m64, r64
|
|
// * MOVQ imm32, m64
|
|
// * MOVQ r64, m64
|
|
// * MOVQ mm, r64 [MMX]
|
|
// * MOVQ r64, mm [MMX]
|
|
// * MOVQ mm, mm [MMX]
|
|
// * MOVQ m64, mm [MMX]
|
|
// * MOVQ mm, m64 [MMX]
|
|
// * MOVQ xmm, r64 [SSE2]
|
|
// * MOVQ r64, xmm [SSE2]
|
|
// * MOVQ xmm, xmm [SSE2]
|
|
// * MOVQ m64, xmm [SSE2]
|
|
// * MOVQ xmm, m64 [SSE2]
|
|
//
|
|
func (self *Program) MOVQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVQ", 2, Operands { v0, v1 })
|
|
// MOVQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVQ imm64, r64
|
|
if isImm64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xb8 | lcode(v[1]))
|
|
m.imm8(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// MOVQ mm, r64
|
|
if isMM(v0) && isReg64(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVQ r64, mm
|
|
if isReg64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVQ mm, m64
|
|
if isMM(v0) && isM64(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// MOVQ xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVQ r64, xmm
|
|
if isReg64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVQ xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd6)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVQ2DQ performs "Move Quadword from MMX Technology to XMM Register".
|
|
//
|
|
// Mnemonic : MOVQ2DQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MOVQ2DQ mm, xmm [SSE2]
|
|
//
|
|
func (self *Program) MOVQ2DQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVQ2DQ", 2, Operands { v0, v1 })
|
|
// MOVQ2DQ mm, xmm
|
|
if isMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVQ2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSBL performs "Move with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSBL r8, r32
|
|
// * MOVSBL m8, r32
|
|
//
|
|
func (self *Program) MOVSBL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSBL", 2, Operands { v0, v1 })
|
|
// MOVSBL r8, r32
|
|
if isReg8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSBL m8, r32
|
|
if isM8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSBL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSBQ performs "Move with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSBQ r8, r64
|
|
// * MOVSBQ m8, r64
|
|
//
|
|
func (self *Program) MOVSBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSBQ", 2, Operands { v0, v1 })
|
|
// MOVSBQ r8, r64
|
|
if isReg8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSBQ m8, r64
|
|
if isM8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSBW performs "Move with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSBW r8, r16
|
|
// * MOVSBW m8, r16
|
|
//
|
|
func (self *Program) MOVSBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSBW", 2, Operands { v0, v1 })
|
|
// MOVSBW r8, r16
|
|
if isReg8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSBW m8, r16
|
|
if isM8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSD performs "Move Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : MOVSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVSD xmm, xmm [SSE2]
|
|
// * MOVSD m64, xmm [SSE2]
|
|
// * MOVSD xmm, m64 [SSE2]
|
|
//
|
|
func (self *Program) MOVSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSD", 2, Operands { v0, v1 })
|
|
// MOVSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVSD xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSHDUP performs "Move Packed Single-FP High and Duplicate".
|
|
//
|
|
// Mnemonic : MOVSHDUP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSHDUP xmm, xmm [SSE3]
|
|
// * MOVSHDUP m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) MOVSHDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSHDUP", 2, Operands { v0, v1 })
|
|
// MOVSHDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSHDUP m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSHDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSLDUP performs "Move Packed Single-FP Low and Duplicate".
|
|
//
|
|
// Mnemonic : MOVSLDUP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSLDUP xmm, xmm [SSE3]
|
|
// * MOVSLDUP m128, xmm [SSE3]
|
|
//
|
|
func (self *Program) MOVSLDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSLDUP", 2, Operands { v0, v1 })
|
|
// MOVSLDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSLDUP m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSLDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSLQ performs "Move Doubleword to Quadword with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSXD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSLQ r32, r64
|
|
// * MOVSLQ m32, r64
|
|
//
|
|
func (self *Program) MOVSLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSLQ", 2, Operands { v0, v1 })
|
|
// MOVSLQ r32, r64
|
|
if isReg32(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSLQ m32, r64
|
|
if isM32(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSS performs "Move Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVSS xmm, xmm [SSE]
|
|
// * MOVSS m32, xmm [SSE]
|
|
// * MOVSS xmm, m32 [SSE]
|
|
//
|
|
func (self *Program) MOVSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSS", 2, Operands { v0, v1 })
|
|
// MOVSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVSS xmm, m32
|
|
if isXMM(v0) && isM32(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSWL performs "Move with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSWL r16, r32
|
|
// * MOVSWL m16, r32
|
|
//
|
|
func (self *Program) MOVSWL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSWL", 2, Operands { v0, v1 })
|
|
// MOVSWL r16, r32
|
|
if isReg16(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSWL m16, r32
|
|
if isM16(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSWL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVSWQ performs "Move with Sign-Extension".
|
|
//
|
|
// Mnemonic : MOVSX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVSWQ r16, r64
|
|
// * MOVSWQ m16, r64
|
|
//
|
|
func (self *Program) MOVSWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVSWQ", 2, Operands { v0, v1 })
|
|
// MOVSWQ r16, r64
|
|
if isReg16(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVSWQ m16, r64
|
|
if isM16(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVSWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVUPD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVUPD xmm, xmm [SSE2]
|
|
// * MOVUPD m128, xmm [SSE2]
|
|
// * MOVUPD xmm, m128 [SSE2]
|
|
//
|
|
func (self *Program) MOVUPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVUPD", 2, Operands { v0, v1 })
|
|
// MOVUPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVUPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVUPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVUPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MOVUPS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * MOVUPS xmm, xmm [SSE]
|
|
// * MOVUPS m128, xmm [SSE]
|
|
// * MOVUPS xmm, m128 [SSE]
|
|
//
|
|
func (self *Program) MOVUPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVUPS", 2, Operands { v0, v1 })
|
|
// MOVUPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// MOVUPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVUPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVUPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVW performs "Move".
|
|
//
|
|
// Mnemonic : MOV
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * MOVW imm16, r16
|
|
// * MOVW r16, r16
|
|
// * MOVW m16, r16
|
|
// * MOVW imm16, m16
|
|
// * MOVW r16, m16
|
|
//
|
|
func (self *Program) MOVW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVW", 2, Operands { v0, v1 })
|
|
// MOVW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xb8 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// MOVW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MOVW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVZBL performs "Move with Zero-Extend".
|
|
//
|
|
// Mnemonic : MOVZX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVZBL r8, r32
|
|
// * MOVZBL m8, r32
|
|
//
|
|
func (self *Program) MOVZBL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVZBL", 2, Operands { v0, v1 })
|
|
// MOVZBL r8, r32
|
|
if isReg8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVZBL m8, r32
|
|
if isM8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVZBL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVZBQ performs "Move with Zero-Extend".
|
|
//
|
|
// Mnemonic : MOVZX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVZBQ r8, r64
|
|
// * MOVZBQ m8, r64
|
|
//
|
|
func (self *Program) MOVZBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVZBQ", 2, Operands { v0, v1 })
|
|
// MOVZBQ r8, r64
|
|
if isReg8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVZBQ m8, r64
|
|
if isM8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVZBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVZBW performs "Move with Zero-Extend".
|
|
//
|
|
// Mnemonic : MOVZX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVZBW r8, r16
|
|
// * MOVZBW m8, r16
|
|
//
|
|
func (self *Program) MOVZBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVZBW", 2, Operands { v0, v1 })
|
|
// MOVZBW r8, r16
|
|
if isReg8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVZBW m8, r16
|
|
if isM8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVZBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVZWL performs "Move with Zero-Extend".
|
|
//
|
|
// Mnemonic : MOVZX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVZWL r16, r32
|
|
// * MOVZWL m16, r32
|
|
//
|
|
func (self *Program) MOVZWL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVZWL", 2, Operands { v0, v1 })
|
|
// MOVZWL r16, r32
|
|
if isReg16(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVZWL m16, r32
|
|
if isM16(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVZWL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MOVZWQ performs "Move with Zero-Extend".
|
|
//
|
|
// Mnemonic : MOVZX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MOVZWQ r16, r64
|
|
// * MOVZWQ m16, r64
|
|
//
|
|
func (self *Program) MOVZWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MOVZWQ", 2, Operands { v0, v1 })
|
|
// MOVZWQ r16, r64
|
|
if isReg16(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MOVZWQ m16, r64
|
|
if isM16(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MOVZWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MPSADBW performs "Compute Multiple Packed Sums of Absolute Difference".
|
|
//
|
|
// Mnemonic : MPSADBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MPSADBW imm8, xmm, xmm [SSE4.1]
|
|
// * MPSADBW imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) MPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("MPSADBW", 3, Operands { v0, v1, v2 })
|
|
// MPSADBW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// MPSADBW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MPSADBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULB performs "Unsigned Multiply".
|
|
//
|
|
// Mnemonic : MUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULB r8
|
|
// * MULB m8
|
|
//
|
|
func (self *Program) MULB(v0 interface{}) *Instruction {
|
|
p := self.alloc("MULB", 1, Operands { v0 })
|
|
// MULB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULL performs "Unsigned Multiply".
|
|
//
|
|
// Mnemonic : MUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULL r32
|
|
// * MULL m32
|
|
//
|
|
func (self *Program) MULL(v0 interface{}) *Instruction {
|
|
p := self.alloc("MULL", 1, Operands { v0 })
|
|
// MULL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULPD performs "Multiply Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MULPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULPD xmm, xmm [SSE2]
|
|
// * MULPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) MULPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MULPD", 2, Operands { v0, v1 })
|
|
// MULPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULPS performs "Multiply Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MULPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULPS xmm, xmm [SSE]
|
|
// * MULPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) MULPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MULPS", 2, Operands { v0, v1 })
|
|
// MULPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULQ performs "Unsigned Multiply".
|
|
//
|
|
// Mnemonic : MUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULQ r64
|
|
// * MULQ m64
|
|
//
|
|
func (self *Program) MULQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("MULQ", 1, Operands { v0 })
|
|
// MULQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULSD performs "Multiply Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MULSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULSD xmm, xmm [SSE2]
|
|
// * MULSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) MULSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MULSD", 2, Operands { v0, v1 })
|
|
// MULSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULSS performs "Multiply Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : MULSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULSS xmm, xmm [SSE]
|
|
// * MULSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) MULSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("MULSS", 2, Operands { v0, v1 })
|
|
// MULSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULW performs "Unsigned Multiply".
|
|
//
|
|
// Mnemonic : MUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULW r16
|
|
// * MULW m16
|
|
//
|
|
func (self *Program) MULW(v0 interface{}) *Instruction {
|
|
p := self.alloc("MULW", 1, Operands { v0 })
|
|
// MULW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULXL performs "Unsigned Multiply Without Affecting Flags".
|
|
//
|
|
// Mnemonic : MULX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULXL r32, r32, r32 [BMI2]
|
|
// * MULXL m32, r32, r32 [BMI2]
|
|
//
|
|
func (self *Program) MULXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("MULXL", 3, Operands { v0, v1, v2 })
|
|
// MULXL r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7b ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULXL m32, r32, r32
|
|
if isM32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MULXQ performs "Unsigned Multiply Without Affecting Flags".
|
|
//
|
|
// Mnemonic : MULX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * MULXQ r64, r64, r64 [BMI2]
|
|
// * MULXQ m64, r64, r64 [BMI2]
|
|
//
|
|
func (self *Program) MULXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("MULXQ", 3, Operands { v0, v1, v2 })
|
|
// MULXQ r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// MULXQ m64, r64, r64
|
|
if isM64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for MULXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// MWAIT performs "Monitor Wait".
|
|
//
|
|
// Mnemonic : MWAIT
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MWAIT [MONITOR]
|
|
//
|
|
func (self *Program) MWAIT() *Instruction {
|
|
p := self.alloc("MWAIT", 0, Operands { })
|
|
// MWAIT
|
|
self.require(ISA_MONITOR)
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xc9)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// MWAITX performs "Monitor Wait with Timeout".
|
|
//
|
|
// Mnemonic : MWAITX
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * MWAITX [MONITORX]
|
|
//
|
|
func (self *Program) MWAITX() *Instruction {
|
|
p := self.alloc("MWAITX", 0, Operands { })
|
|
// MWAITX
|
|
self.require(ISA_MONITORX)
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xfb)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// NEGB performs "Two's Complement Negation".
|
|
//
|
|
// Mnemonic : NEG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NEGB r8
|
|
// * NEGB m8
|
|
//
|
|
func (self *Program) NEGB(v0 interface{}) *Instruction {
|
|
p := self.alloc("NEGB", 1, Operands { v0 })
|
|
// NEGB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NEGB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NEGB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NEGL performs "Two's Complement Negation".
|
|
//
|
|
// Mnemonic : NEG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NEGL r32
|
|
// * NEGL m32
|
|
//
|
|
func (self *Program) NEGL(v0 interface{}) *Instruction {
|
|
p := self.alloc("NEGL", 1, Operands { v0 })
|
|
// NEGL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NEGL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NEGL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NEGQ performs "Two's Complement Negation".
|
|
//
|
|
// Mnemonic : NEG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NEGQ r64
|
|
// * NEGQ m64
|
|
//
|
|
func (self *Program) NEGQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("NEGQ", 1, Operands { v0 })
|
|
// NEGQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NEGQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NEGQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NEGW performs "Two's Complement Negation".
|
|
//
|
|
// Mnemonic : NEG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NEGW r16
|
|
// * NEGW m16
|
|
//
|
|
func (self *Program) NEGW(v0 interface{}) *Instruction {
|
|
p := self.alloc("NEGW", 1, Operands { v0 })
|
|
// NEGW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xd8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NEGW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NEGW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NOP performs "No Operation".
|
|
//
|
|
// Mnemonic : NOP
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * NOP
|
|
//
|
|
func (self *Program) NOP() *Instruction {
|
|
p := self.alloc("NOP", 0, Operands { })
|
|
// NOP
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x90)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// NOTB performs "One's Complement Negation".
|
|
//
|
|
// Mnemonic : NOT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NOTB r8
|
|
// * NOTB m8
|
|
//
|
|
func (self *Program) NOTB(v0 interface{}) *Instruction {
|
|
p := self.alloc("NOTB", 1, Operands { v0 })
|
|
// NOTB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0xf6)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NOTB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NOTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NOTL performs "One's Complement Negation".
|
|
//
|
|
// Mnemonic : NOT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NOTL r32
|
|
// * NOTL m32
|
|
//
|
|
func (self *Program) NOTL(v0 interface{}) *Instruction {
|
|
p := self.alloc("NOTL", 1, Operands { v0 })
|
|
// NOTL r32
|
|
if isReg32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NOTL m32
|
|
if isM32(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NOTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NOTQ performs "One's Complement Negation".
|
|
//
|
|
// Mnemonic : NOT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NOTQ r64
|
|
// * NOTQ m64
|
|
//
|
|
func (self *Program) NOTQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("NOTQ", 1, Operands { v0 })
|
|
// NOTQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NOTQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NOTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// NOTW performs "One's Complement Negation".
|
|
//
|
|
// Mnemonic : NOT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * NOTW r16
|
|
// * NOTW m16
|
|
//
|
|
func (self *Program) NOTW(v0 interface{}) *Instruction {
|
|
p := self.alloc("NOTW", 1, Operands { v0 })
|
|
// NOTW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xd0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// NOTW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for NOTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORB performs "Logical Inclusive OR".
|
|
//
|
|
// Mnemonic : OR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ORB imm8, al
|
|
// * ORB imm8, r8
|
|
// * ORB r8, r8
|
|
// * ORB m8, r8
|
|
// * ORB imm8, m8
|
|
// * ORB r8, m8
|
|
//
|
|
func (self *Program) ORB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORB", 2, Operands { v0, v1 })
|
|
// ORB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0c)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ORB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORL performs "Logical Inclusive OR".
|
|
//
|
|
// Mnemonic : OR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ORL imm32, eax
|
|
// * ORL imm8, r32
|
|
// * ORL imm32, r32
|
|
// * ORL r32, r32
|
|
// * ORL m32, r32
|
|
// * ORL imm8, m32
|
|
// * ORL imm32, m32
|
|
// * ORL r32, m32
|
|
//
|
|
func (self *Program) ORL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORL", 2, Operands { v0, v1 })
|
|
// ORL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ORL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ORPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ORPD xmm, xmm [SSE2]
|
|
// * ORPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) ORPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORPD", 2, Operands { v0, v1 })
|
|
// ORPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ORPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ORPS xmm, xmm [SSE]
|
|
// * ORPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) ORPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORPS", 2, Operands { v0, v1 })
|
|
// ORPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORQ performs "Logical Inclusive OR".
|
|
//
|
|
// Mnemonic : OR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ORQ imm32, rax
|
|
// * ORQ imm8, r64
|
|
// * ORQ imm32, r64
|
|
// * ORQ r64, r64
|
|
// * ORQ m64, r64
|
|
// * ORQ imm8, m64
|
|
// * ORQ imm32, m64
|
|
// * ORQ r64, m64
|
|
//
|
|
func (self *Program) ORQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORQ", 2, Operands { v0, v1 })
|
|
// ORQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x0d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ORQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ORW performs "Logical Inclusive OR".
|
|
//
|
|
// Mnemonic : OR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * ORW imm16, ax
|
|
// * ORW imm8, r16
|
|
// * ORW imm16, r16
|
|
// * ORW r16, r16
|
|
// * ORW m16, r16
|
|
// * ORW imm8, m16
|
|
// * ORW imm16, m16
|
|
// * ORW r16, m16
|
|
//
|
|
func (self *Program) ORW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ORW", 2, Operands { v0, v1 })
|
|
// ORW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x0d)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// ORW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// ORW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ORW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ORW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PABSB performs "Packed Absolute Value of Byte Integers".
|
|
//
|
|
// Mnemonic : PABSB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PABSB mm, mm [SSSE3]
|
|
// * PABSB m64, mm [SSSE3]
|
|
// * PABSB xmm, xmm [SSSE3]
|
|
// * PABSB m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PABSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PABSB", 2, Operands { v0, v1 })
|
|
// PABSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PABSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PABSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PABSD performs "Packed Absolute Value of Doubleword Integers".
|
|
//
|
|
// Mnemonic : PABSD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PABSD mm, mm [SSSE3]
|
|
// * PABSD m64, mm [SSSE3]
|
|
// * PABSD xmm, xmm [SSSE3]
|
|
// * PABSD m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PABSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PABSD", 2, Operands { v0, v1 })
|
|
// PABSD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PABSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PABSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PABSW performs "Packed Absolute Value of Word Integers".
|
|
//
|
|
// Mnemonic : PABSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PABSW mm, mm [SSSE3]
|
|
// * PABSW m64, mm [SSSE3]
|
|
// * PABSW xmm, xmm [SSSE3]
|
|
// * PABSW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PABSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PABSW", 2, Operands { v0, v1 })
|
|
// PABSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PABSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PABSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PABSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PACKSSDW performs "Pack Doublewords into Words with Signed Saturation".
|
|
//
|
|
// Mnemonic : PACKSSDW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PACKSSDW mm, mm [MMX]
|
|
// * PACKSSDW m64, mm [MMX]
|
|
// * PACKSSDW xmm, xmm [SSE2]
|
|
// * PACKSSDW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PACKSSDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PACKSSDW", 2, Operands { v0, v1 })
|
|
// PACKSSDW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKSSDW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PACKSSDW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKSSDW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PACKSSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PACKSSWB performs "Pack Words into Bytes with Signed Saturation".
|
|
//
|
|
// Mnemonic : PACKSSWB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PACKSSWB mm, mm [MMX]
|
|
// * PACKSSWB m64, mm [MMX]
|
|
// * PACKSSWB xmm, xmm [SSE2]
|
|
// * PACKSSWB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PACKSSWB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PACKSSWB", 2, Operands { v0, v1 })
|
|
// PACKSSWB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKSSWB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PACKSSWB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKSSWB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PACKSSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PACKUSDW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PACKUSDW xmm, xmm [SSE4.1]
|
|
// * PACKUSDW m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PACKUSDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PACKUSDW", 2, Operands { v0, v1 })
|
|
// PACKUSDW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKUSDW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PACKUSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PACKUSWB performs "Pack Words into Bytes with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PACKUSWB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PACKUSWB mm, mm [MMX]
|
|
// * PACKUSWB m64, mm [MMX]
|
|
// * PACKUSWB xmm, xmm [SSE2]
|
|
// * PACKUSWB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PACKUSWB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PACKUSWB", 2, Operands { v0, v1 })
|
|
// PACKUSWB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKUSWB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PACKUSWB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PACKUSWB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PACKUSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDB performs "Add Packed Byte Integers".
|
|
//
|
|
// Mnemonic : PADDB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDB mm, mm [MMX]
|
|
// * PADDB m64, mm [MMX]
|
|
// * PADDB xmm, xmm [SSE2]
|
|
// * PADDB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDB", 2, Operands { v0, v1 })
|
|
// PADDB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDD performs "Add Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : PADDD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDD mm, mm [MMX]
|
|
// * PADDD m64, mm [MMX]
|
|
// * PADDD xmm, xmm [SSE2]
|
|
// * PADDD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDD", 2, Operands { v0, v1 })
|
|
// PADDD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDQ performs "Add Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : PADDQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDQ mm, mm [SSE2]
|
|
// * PADDQ m64, mm [SSE2]
|
|
// * PADDQ xmm, xmm [SSE2]
|
|
// * PADDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDQ", 2, Operands { v0, v1 })
|
|
// PADDQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDSB performs "Add Packed Signed Byte Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PADDSB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDSB mm, mm [MMX]
|
|
// * PADDSB m64, mm [MMX]
|
|
// * PADDSB xmm, xmm [SSE2]
|
|
// * PADDSB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDSB", 2, Operands { v0, v1 })
|
|
// PADDSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDSW performs "Add Packed Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PADDSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDSW mm, mm [MMX]
|
|
// * PADDSW m64, mm [MMX]
|
|
// * PADDSW xmm, xmm [SSE2]
|
|
// * PADDSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDSW", 2, Operands { v0, v1 })
|
|
// PADDSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PADDUSB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDUSB mm, mm [MMX]
|
|
// * PADDUSB m64, mm [MMX]
|
|
// * PADDUSB xmm, xmm [SSE2]
|
|
// * PADDUSB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDUSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDUSB", 2, Operands { v0, v1 })
|
|
// PADDUSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDUSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDUSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDUSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDUSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PADDUSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDUSW mm, mm [MMX]
|
|
// * PADDUSW m64, mm [MMX]
|
|
// * PADDUSW xmm, xmm [SSE2]
|
|
// * PADDUSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDUSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDUSW", 2, Operands { v0, v1 })
|
|
// PADDUSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDUSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDUSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDUSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDUSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PADDW performs "Add Packed Word Integers".
|
|
//
|
|
// Mnemonic : PADDW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PADDW mm, mm [MMX]
|
|
// * PADDW m64, mm [MMX]
|
|
// * PADDW xmm, xmm [SSE2]
|
|
// * PADDW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PADDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PADDW", 2, Operands { v0, v1 })
|
|
// PADDW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PADDW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PADDW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PALIGNR performs "Packed Align Right".
|
|
//
|
|
// Mnemonic : PALIGNR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PALIGNR imm8, mm, mm [SSSE3]
|
|
// * PALIGNR imm8, m64, mm [SSSE3]
|
|
// * PALIGNR imm8, xmm, xmm [SSSE3]
|
|
// * PALIGNR imm8, m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PALIGNR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PALIGNR", 3, Operands { v0, v1, v2 })
|
|
// PALIGNR imm8, mm, mm
|
|
if isImm8(v0) && isMM(v1) && isMM(v2) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PALIGNR imm8, m64, mm
|
|
if isImm8(v0) && isM64(v1) && isMM(v2) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PALIGNR imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PALIGNR imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PALIGNR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PAND performs "Packed Bitwise Logical AND".
|
|
//
|
|
// Mnemonic : PAND
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PAND mm, mm [MMX]
|
|
// * PAND m64, mm [MMX]
|
|
// * PAND xmm, xmm [SSE2]
|
|
// * PAND m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PAND(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PAND", 2, Operands { v0, v1 })
|
|
// PAND mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAND m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PAND xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAND m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PAND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PANDN performs "Packed Bitwise Logical AND NOT".
|
|
//
|
|
// Mnemonic : PANDN
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PANDN mm, mm [MMX]
|
|
// * PANDN m64, mm [MMX]
|
|
// * PANDN xmm, xmm [SSE2]
|
|
// * PANDN m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PANDN(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PANDN", 2, Operands { v0, v1 })
|
|
// PANDN mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PANDN m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PANDN xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PANDN m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PANDN")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PAUSE performs "Spin Loop Hint".
|
|
//
|
|
// Mnemonic : PAUSE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PAUSE
|
|
//
|
|
func (self *Program) PAUSE() *Instruction {
|
|
p := self.alloc("PAUSE", 0, Operands { })
|
|
// PAUSE
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x90)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// PAVGB performs "Average Packed Byte Integers".
|
|
//
|
|
// Mnemonic : PAVGB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PAVGB mm, mm [MMX+]
|
|
// * PAVGB m64, mm [MMX+]
|
|
// * PAVGB xmm, xmm [SSE2]
|
|
// * PAVGB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PAVGB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PAVGB", 2, Operands { v0, v1 })
|
|
// PAVGB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAVGB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PAVGB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAVGB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PAVGB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PAVGUSB performs "Average Packed Byte Integers".
|
|
//
|
|
// Mnemonic : PAVGUSB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PAVGUSB mm, mm [3dnow!]
|
|
// * PAVGUSB m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PAVGUSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PAVGUSB", 2, Operands { v0, v1 })
|
|
// PAVGUSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xbf)
|
|
})
|
|
}
|
|
// PAVGUSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xbf)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PAVGUSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PAVGW performs "Average Packed Word Integers".
|
|
//
|
|
// Mnemonic : PAVGW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PAVGW mm, mm [MMX+]
|
|
// * PAVGW m64, mm [MMX+]
|
|
// * PAVGW xmm, xmm [SSE2]
|
|
// * PAVGW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PAVGW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PAVGW", 2, Operands { v0, v1 })
|
|
// PAVGW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAVGW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PAVGW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PAVGW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PAVGW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PBLENDVB performs "Variable Blend Packed Bytes".
|
|
//
|
|
// Mnemonic : PBLENDVB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PBLENDVB xmm0, xmm, xmm [SSE4.1]
|
|
// * PBLENDVB xmm0, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PBLENDVB", 3, Operands { v0, v1, v2 })
|
|
// PBLENDVB xmm0, xmm, xmm
|
|
if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// PBLENDVB xmm0, m128, xmm
|
|
if v0 == XMM0 && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PBLENDVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PBLENDW performs "Blend Packed Words".
|
|
//
|
|
// Mnemonic : PBLENDW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PBLENDW imm8, xmm, xmm [SSE4.1]
|
|
// * PBLENDW imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PBLENDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PBLENDW", 3, Operands { v0, v1, v2 })
|
|
// PBLENDW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PBLENDW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0e)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PBLENDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCLMULQDQ performs "Carry-Less Quadword Multiplication".
|
|
//
|
|
// Mnemonic : PCLMULQDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCLMULQDQ imm8, xmm, xmm [PCLMULQDQ]
|
|
// * PCLMULQDQ imm8, m128, xmm [PCLMULQDQ]
|
|
//
|
|
func (self *Program) PCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PCLMULQDQ", 3, Operands { v0, v1, v2 })
|
|
// PCLMULQDQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_PCLMULQDQ)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PCLMULQDQ imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_PCLMULQDQ)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCLMULQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPEQB performs "Compare Packed Byte Data for Equality".
|
|
//
|
|
// Mnemonic : PCMPEQB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPEQB mm, mm [MMX]
|
|
// * PCMPEQB m64, mm [MMX]
|
|
// * PCMPEQB xmm, xmm [SSE2]
|
|
// * PCMPEQB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPEQB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPEQB", 2, Operands { v0, v1 })
|
|
// PCMPEQB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPEQB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPEQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPEQD performs "Compare Packed Doubleword Data for Equality".
|
|
//
|
|
// Mnemonic : PCMPEQD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPEQD mm, mm [MMX]
|
|
// * PCMPEQD m64, mm [MMX]
|
|
// * PCMPEQD xmm, xmm [SSE2]
|
|
// * PCMPEQD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPEQD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPEQD", 2, Operands { v0, v1 })
|
|
// PCMPEQD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPEQD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPEQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPEQQ performs "Compare Packed Quadword Data for Equality".
|
|
//
|
|
// Mnemonic : PCMPEQQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPEQQ xmm, xmm [SSE4.1]
|
|
// * PCMPEQQ m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PCMPEQQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPEQQ", 2, Operands { v0, v1 })
|
|
// PCMPEQQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPEQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPEQW performs "Compare Packed Word Data for Equality".
|
|
//
|
|
// Mnemonic : PCMPEQW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPEQW mm, mm [MMX]
|
|
// * PCMPEQW m64, mm [MMX]
|
|
// * PCMPEQW xmm, xmm [SSE2]
|
|
// * PCMPEQW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPEQW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPEQW", 2, Operands { v0, v1 })
|
|
// PCMPEQW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPEQW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPEQW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPEQW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index".
|
|
//
|
|
// Mnemonic : PCMPESTRI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPESTRI imm8, xmm, xmm [SSE4.2]
|
|
// * PCMPESTRI imm8, m128, xmm [SSE4.2]
|
|
//
|
|
func (self *Program) PCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PCMPESTRI", 3, Operands { v0, v1, v2 })
|
|
// PCMPESTRI imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PCMPESTRI imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPESTRI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask".
|
|
//
|
|
// Mnemonic : PCMPESTRM
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPESTRM imm8, xmm, xmm [SSE4.2]
|
|
// * PCMPESTRM imm8, m128, xmm [SSE4.2]
|
|
//
|
|
func (self *Program) PCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PCMPESTRM", 3, Operands { v0, v1, v2 })
|
|
// PCMPESTRM imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PCMPESTRM imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPESTRM")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than".
|
|
//
|
|
// Mnemonic : PCMPGTB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPGTB mm, mm [MMX]
|
|
// * PCMPGTB m64, mm [MMX]
|
|
// * PCMPGTB xmm, xmm [SSE2]
|
|
// * PCMPGTB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPGTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPGTB", 2, Operands { v0, v1 })
|
|
// PCMPGTB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPGTB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPGTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than".
|
|
//
|
|
// Mnemonic : PCMPGTD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPGTD mm, mm [MMX]
|
|
// * PCMPGTD m64, mm [MMX]
|
|
// * PCMPGTD xmm, xmm [SSE2]
|
|
// * PCMPGTD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPGTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPGTD", 2, Operands { v0, v1 })
|
|
// PCMPGTD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPGTD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPGTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPGTQ performs "Compare Packed Data for Greater Than".
|
|
//
|
|
// Mnemonic : PCMPGTQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPGTQ xmm, xmm [SSE4.2]
|
|
// * PCMPGTQ m128, xmm [SSE4.2]
|
|
//
|
|
func (self *Program) PCMPGTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPGTQ", 2, Operands { v0, v1 })
|
|
// PCMPGTQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPGTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPGTW performs "Compare Packed Signed Word Integers for Greater Than".
|
|
//
|
|
// Mnemonic : PCMPGTW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PCMPGTW mm, mm [MMX]
|
|
// * PCMPGTW m64, mm [MMX]
|
|
// * PCMPGTW xmm, xmm [SSE2]
|
|
// * PCMPGTW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PCMPGTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PCMPGTW", 2, Operands { v0, v1 })
|
|
// PCMPGTW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PCMPGTW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PCMPGTW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPGTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index".
|
|
//
|
|
// Mnemonic : PCMPISTRI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPISTRI imm8, xmm, xmm [SSE4.2]
|
|
// * PCMPISTRI imm8, m128, xmm [SSE4.2]
|
|
//
|
|
func (self *Program) PCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PCMPISTRI", 3, Operands { v0, v1, v2 })
|
|
// PCMPISTRI imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PCMPISTRI imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPISTRI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask".
|
|
//
|
|
// Mnemonic : PCMPISTRM
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PCMPISTRM imm8, xmm, xmm [SSE4.2]
|
|
// * PCMPISTRM imm8, m128, xmm [SSE4.2]
|
|
//
|
|
func (self *Program) PCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PCMPISTRM", 3, Operands { v0, v1, v2 })
|
|
// PCMPISTRM imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PCMPISTRM imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PCMPISTRM")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PDEP performs "Parallel Bits Deposit".
|
|
//
|
|
// Mnemonic : PDEP
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PDEP r32, r32, r32 [BMI2]
|
|
// * PDEP m32, r32, r32 [BMI2]
|
|
// * PDEP r64, r64, r64 [BMI2]
|
|
// * PDEP m64, r64, r64 [BMI2]
|
|
//
|
|
func (self *Program) PDEP(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PDEP", 3, Operands { v0, v1, v2 })
|
|
// PDEP r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7b ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PDEP m32, r32, r32
|
|
if isM32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PDEP r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PDEP m64, r64, r64
|
|
if isM64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PDEP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PEXT performs "Parallel Bits Extract".
|
|
//
|
|
// Mnemonic : PEXT
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PEXT r32, r32, r32 [BMI2]
|
|
// * PEXT m32, r32, r32 [BMI2]
|
|
// * PEXT r64, r64, r64 [BMI2]
|
|
// * PEXT m64, r64, r64 [BMI2]
|
|
//
|
|
func (self *Program) PEXT(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PEXT", 3, Operands { v0, v1, v2 })
|
|
// PEXT r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7a ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PEXT m32, r32, r32
|
|
if isM32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PEXT r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfa ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PEXT m64, r64, r64
|
|
if isM64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PEXT")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PEXTRB performs "Extract Byte".
|
|
//
|
|
// Mnemonic : PEXTRB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PEXTRB imm8, xmm, r32 [SSE4.1]
|
|
// * PEXTRB imm8, xmm, m8 [SSE4.1]
|
|
//
|
|
func (self *Program) PEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PEXTRB", 3, Operands { v0, v1, v2 })
|
|
// PEXTRB imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PEXTRB imm8, xmm, m8
|
|
if isImm8(v0) && isXMM(v1) && isM8(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PEXTRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PEXTRD performs "Extract Doubleword".
|
|
//
|
|
// Mnemonic : PEXTRD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PEXTRD imm8, xmm, r32 [SSE4.1]
|
|
// * PEXTRD imm8, xmm, m32 [SSE4.1]
|
|
//
|
|
func (self *Program) PEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PEXTRD", 3, Operands { v0, v1, v2 })
|
|
// PEXTRD imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PEXTRD imm8, xmm, m32
|
|
if isImm8(v0) && isXMM(v1) && isM32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PEXTRD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PEXTRQ performs "Extract Quadword".
|
|
//
|
|
// Mnemonic : PEXTRQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PEXTRQ imm8, xmm, r64 [SSE4.1]
|
|
// * PEXTRQ imm8, xmm, m64 [SSE4.1]
|
|
//
|
|
func (self *Program) PEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PEXTRQ", 3, Operands { v0, v1, v2 })
|
|
// PEXTRQ imm8, xmm, r64
|
|
if isImm8(v0) && isXMM(v1) && isReg64(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PEXTRQ imm8, xmm, m64
|
|
if isImm8(v0) && isXMM(v1) && isM64(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexm(1, hcode(v[1]), addr(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PEXTRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PEXTRW performs "Extract Word".
|
|
//
|
|
// Mnemonic : PEXTRW
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * PEXTRW imm8, mm, r32 [MMX+]
|
|
// * PEXTRW imm8, xmm, r32 [SSE4.1]
|
|
// * PEXTRW imm8, xmm, m16 [SSE4.1]
|
|
//
|
|
func (self *Program) PEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PEXTRW", 3, Operands { v0, v1, v2 })
|
|
// PEXTRW imm8, mm, r32
|
|
if isImm8(v0) && isMM(v1) && isReg32(v2) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PEXTRW imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PEXTRW imm8, xmm, m16
|
|
if isImm8(v0) && isXMM(v1) && isM16(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PEXTRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PF2ID performs "Packed Floating-Point to Integer Doubleword Converson".
|
|
//
|
|
// Mnemonic : PF2ID
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PF2ID mm, mm [3dnow!]
|
|
// * PF2ID m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PF2ID(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PF2ID", 2, Operands { v0, v1 })
|
|
// PF2ID mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x1d)
|
|
})
|
|
}
|
|
// PF2ID m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x1d)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PF2ID")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PF2IW performs "Packed Floating-Point to Integer Word Conversion".
|
|
//
|
|
// Mnemonic : PF2IW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PF2IW mm, mm [3dnow!+]
|
|
// * PF2IW m64, mm [3dnow!+]
|
|
//
|
|
func (self *Program) PF2IW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PF2IW", 2, Operands { v0, v1 })
|
|
// PF2IW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x1c)
|
|
})
|
|
}
|
|
// PF2IW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x1c)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PF2IW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFACC performs "Packed Floating-Point Accumulate".
|
|
//
|
|
// Mnemonic : PFACC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFACC mm, mm [3dnow!]
|
|
// * PFACC m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFACC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFACC", 2, Operands { v0, v1 })
|
|
// PFACC mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xae)
|
|
})
|
|
}
|
|
// PFACC m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xae)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFACC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFADD performs "Packed Floating-Point Add".
|
|
//
|
|
// Mnemonic : PFADD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFADD mm, mm [3dnow!]
|
|
// * PFADD m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFADD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFADD", 2, Operands { v0, v1 })
|
|
// PFADD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x9e)
|
|
})
|
|
}
|
|
// PFADD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x9e)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFADD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFCMPEQ performs "Packed Floating-Point Compare for Equal".
|
|
//
|
|
// Mnemonic : PFCMPEQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFCMPEQ mm, mm [3dnow!]
|
|
// * PFCMPEQ m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFCMPEQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFCMPEQ", 2, Operands { v0, v1 })
|
|
// PFCMPEQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xb0)
|
|
})
|
|
}
|
|
// PFCMPEQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xb0)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFCMPEQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFCMPGE performs "Packed Floating-Point Compare for Greater or Equal".
|
|
//
|
|
// Mnemonic : PFCMPGE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFCMPGE mm, mm [3dnow!]
|
|
// * PFCMPGE m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFCMPGE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFCMPGE", 2, Operands { v0, v1 })
|
|
// PFCMPGE mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x90)
|
|
})
|
|
}
|
|
// PFCMPGE m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x90)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFCMPGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFCMPGT performs "Packed Floating-Point Compare for Greater Than".
|
|
//
|
|
// Mnemonic : PFCMPGT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFCMPGT mm, mm [3dnow!]
|
|
// * PFCMPGT m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFCMPGT(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFCMPGT", 2, Operands { v0, v1 })
|
|
// PFCMPGT mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xa0)
|
|
})
|
|
}
|
|
// PFCMPGT m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xa0)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFCMPGT")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFMAX performs "Packed Floating-Point Maximum".
|
|
//
|
|
// Mnemonic : PFMAX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFMAX mm, mm [3dnow!]
|
|
// * PFMAX m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFMAX(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFMAX", 2, Operands { v0, v1 })
|
|
// PFMAX mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xa4)
|
|
})
|
|
}
|
|
// PFMAX m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xa4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFMAX")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFMIN performs "Packed Floating-Point Minimum".
|
|
//
|
|
// Mnemonic : PFMIN
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFMIN mm, mm [3dnow!]
|
|
// * PFMIN m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFMIN(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFMIN", 2, Operands { v0, v1 })
|
|
// PFMIN mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x94)
|
|
})
|
|
}
|
|
// PFMIN m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x94)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFMIN")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFMUL performs "Packed Floating-Point Multiply".
|
|
//
|
|
// Mnemonic : PFMUL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFMUL mm, mm [3dnow!]
|
|
// * PFMUL m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFMUL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFMUL", 2, Operands { v0, v1 })
|
|
// PFMUL mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xb4)
|
|
})
|
|
}
|
|
// PFMUL m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xb4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFMUL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFNACC performs "Packed Floating-Point Negative Accumulate".
|
|
//
|
|
// Mnemonic : PFNACC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFNACC mm, mm [3dnow!+]
|
|
// * PFNACC m64, mm [3dnow!+]
|
|
//
|
|
func (self *Program) PFNACC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFNACC", 2, Operands { v0, v1 })
|
|
// PFNACC mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x8a)
|
|
})
|
|
}
|
|
// PFNACC m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x8a)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFNACC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFPNACC performs "Packed Floating-Point Positive-Negative Accumulate".
|
|
//
|
|
// Mnemonic : PFPNACC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFPNACC mm, mm [3dnow!+]
|
|
// * PFPNACC m64, mm [3dnow!+]
|
|
//
|
|
func (self *Program) PFPNACC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFPNACC", 2, Operands { v0, v1 })
|
|
// PFPNACC mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x8e)
|
|
})
|
|
}
|
|
// PFPNACC m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x8e)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFPNACC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFRCP performs "Packed Floating-Point Reciprocal Approximation".
|
|
//
|
|
// Mnemonic : PFRCP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFRCP mm, mm [3dnow!]
|
|
// * PFRCP m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFRCP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFRCP", 2, Operands { v0, v1 })
|
|
// PFRCP mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x96)
|
|
})
|
|
}
|
|
// PFRCP m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x96)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFRCP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFRCPIT1 performs "Packed Floating-Point Reciprocal Iteration 1".
|
|
//
|
|
// Mnemonic : PFRCPIT1
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFRCPIT1 mm, mm [3dnow!]
|
|
// * PFRCPIT1 m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFRCPIT1(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFRCPIT1", 2, Operands { v0, v1 })
|
|
// PFRCPIT1 mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xa6)
|
|
})
|
|
}
|
|
// PFRCPIT1 m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xa6)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFRCPIT1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFRCPIT2 performs "Packed Floating-Point Reciprocal Iteration 2".
|
|
//
|
|
// Mnemonic : PFRCPIT2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFRCPIT2 mm, mm [3dnow!]
|
|
// * PFRCPIT2 m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFRCPIT2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFRCPIT2", 2, Operands { v0, v1 })
|
|
// PFRCPIT2 mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xb6)
|
|
})
|
|
}
|
|
// PFRCPIT2 m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xb6)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFRCPIT2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFRSQIT1 performs "Packed Floating-Point Reciprocal Square Root Iteration 1".
|
|
//
|
|
// Mnemonic : PFRSQIT1
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFRSQIT1 mm, mm [3dnow!]
|
|
// * PFRSQIT1 m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFRSQIT1(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFRSQIT1", 2, Operands { v0, v1 })
|
|
// PFRSQIT1 mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xa7)
|
|
})
|
|
}
|
|
// PFRSQIT1 m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xa7)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFRSQIT1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFRSQRT performs "Packed Floating-Point Reciprocal Square Root Approximation".
|
|
//
|
|
// Mnemonic : PFRSQRT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFRSQRT mm, mm [3dnow!]
|
|
// * PFRSQRT m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFRSQRT(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFRSQRT", 2, Operands { v0, v1 })
|
|
// PFRSQRT mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x97)
|
|
})
|
|
}
|
|
// PFRSQRT m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x97)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFRSQRT")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFSUB performs "Packed Floating-Point Subtract".
|
|
//
|
|
// Mnemonic : PFSUB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFSUB mm, mm [3dnow!]
|
|
// * PFSUB m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFSUB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFSUB", 2, Operands { v0, v1 })
|
|
// PFSUB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x9a)
|
|
})
|
|
}
|
|
// PFSUB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x9a)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFSUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PFSUBR performs "Packed Floating-Point Subtract Reverse".
|
|
//
|
|
// Mnemonic : PFSUBR
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PFSUBR mm, mm [3dnow!]
|
|
// * PFSUBR m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PFSUBR(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PFSUBR", 2, Operands { v0, v1 })
|
|
// PFSUBR mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xaa)
|
|
})
|
|
}
|
|
// PFSUBR m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xaa)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PFSUBR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHADDD performs "Packed Horizontal Add Doubleword Integer".
|
|
//
|
|
// Mnemonic : PHADDD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHADDD mm, mm [SSSE3]
|
|
// * PHADDD m64, mm [SSSE3]
|
|
// * PHADDD xmm, xmm [SSSE3]
|
|
// * PHADDD m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHADDD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHADDD", 2, Operands { v0, v1 })
|
|
// PHADDD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHADDD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHADDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PHADDSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHADDSW mm, mm [SSSE3]
|
|
// * PHADDSW m64, mm [SSSE3]
|
|
// * PHADDSW xmm, xmm [SSSE3]
|
|
// * PHADDSW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHADDSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHADDSW", 2, Operands { v0, v1 })
|
|
// PHADDSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHADDSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHADDSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHADDW performs "Packed Horizontal Add Word Integers".
|
|
//
|
|
// Mnemonic : PHADDW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHADDW mm, mm [SSSE3]
|
|
// * PHADDW m64, mm [SSSE3]
|
|
// * PHADDW xmm, xmm [SSSE3]
|
|
// * PHADDW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHADDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHADDW", 2, Operands { v0, v1 })
|
|
// PHADDW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHADDW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHADDW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : PHMINPOSUW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PHMINPOSUW xmm, xmm [SSE4.1]
|
|
// * PHMINPOSUW m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHMINPOSUW", 2, Operands { v0, v1 })
|
|
// PHMINPOSUW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHMINPOSUW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHMINPOSUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHSUBD performs "Packed Horizontal Subtract Doubleword Integers".
|
|
//
|
|
// Mnemonic : PHSUBD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHSUBD mm, mm [SSSE3]
|
|
// * PHSUBD m64, mm [SSSE3]
|
|
// * PHSUBD xmm, xmm [SSSE3]
|
|
// * PHSUBD m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHSUBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHSUBD", 2, Operands { v0, v1 })
|
|
// PHSUBD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x06)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x06)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHSUBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x06)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x06)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHSUBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PHSUBSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHSUBSW mm, mm [SSSE3]
|
|
// * PHSUBSW m64, mm [SSSE3]
|
|
// * PHSUBSW xmm, xmm [SSSE3]
|
|
// * PHSUBSW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHSUBSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHSUBSW", 2, Operands { v0, v1 })
|
|
// PHSUBSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x07)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x07)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHSUBSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x07)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x07)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHSUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PHSUBW performs "Packed Horizontal Subtract Word Integers".
|
|
//
|
|
// Mnemonic : PHSUBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PHSUBW mm, mm [SSSE3]
|
|
// * PHSUBW m64, mm [SSSE3]
|
|
// * PHSUBW xmm, xmm [SSSE3]
|
|
// * PHSUBW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PHSUBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PHSUBW", 2, Operands { v0, v1 })
|
|
// PHSUBW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PHSUBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PHSUBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PHSUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PI2FD performs "Packed Integer to Floating-Point Doubleword Conversion".
|
|
//
|
|
// Mnemonic : PI2FD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PI2FD mm, mm [3dnow!]
|
|
// * PI2FD m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PI2FD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PI2FD", 2, Operands { v0, v1 })
|
|
// PI2FD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x0d)
|
|
})
|
|
}
|
|
// PI2FD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x0d)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PI2FD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PI2FW performs "Packed Integer to Floating-Point Word Conversion".
|
|
//
|
|
// Mnemonic : PI2FW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PI2FW mm, mm [3dnow!+]
|
|
// * PI2FW m64, mm [3dnow!+]
|
|
//
|
|
func (self *Program) PI2FW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PI2FW", 2, Operands { v0, v1 })
|
|
// PI2FW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0x0c)
|
|
})
|
|
}
|
|
// PI2FW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0x0c)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PI2FW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PINSRB performs "Insert Byte".
|
|
//
|
|
// Mnemonic : PINSRB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PINSRB imm8, r32, xmm [SSE4.1]
|
|
// * PINSRB imm8, m8, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PINSRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PINSRB", 3, Operands { v0, v1, v2 })
|
|
// PINSRB imm8, r32, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRB imm8, m8, xmm
|
|
if isImm8(v0) && isM8(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PINSRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PINSRD performs "Insert Doubleword".
|
|
//
|
|
// Mnemonic : PINSRD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PINSRD imm8, r32, xmm [SSE4.1]
|
|
// * PINSRD imm8, m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PINSRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PINSRD", 3, Operands { v0, v1, v2 })
|
|
// PINSRD imm8, r32, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRD imm8, m32, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PINSRD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PINSRQ performs "Insert Quadword".
|
|
//
|
|
// Mnemonic : PINSRQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PINSRQ imm8, r64, xmm [SSE4.1]
|
|
// * PINSRQ imm8, m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PINSRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PINSRQ", 3, Operands { v0, v1, v2 })
|
|
// PINSRQ imm8, r64, xmm
|
|
if isImm8(v0) && isReg64(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRQ imm8, m64, xmm
|
|
if isImm8(v0) && isM64(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexm(1, hcode(v[2]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PINSRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PINSRW performs "Insert Word".
|
|
//
|
|
// Mnemonic : PINSRW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PINSRW imm8, r32, mm [MMX+]
|
|
// * PINSRW imm8, m16, mm [MMX+]
|
|
// * PINSRW imm8, r32, xmm [SSE2]
|
|
// * PINSRW imm8, m16, xmm [SSE2]
|
|
//
|
|
func (self *Program) PINSRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PINSRW", 3, Operands { v0, v1, v2 })
|
|
// PINSRW imm8, r32, mm
|
|
if isImm8(v0) && isReg32(v1) && isMM(v2) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRW imm8, m16, mm
|
|
if isImm8(v0) && isM16(v1) && isMM(v2) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRW imm8, r32, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PINSRW imm8, m16, xmm
|
|
if isImm8(v0) && isM16(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PINSRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : PMADDUBSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMADDUBSW mm, mm [SSSE3]
|
|
// * PMADDUBSW m64, mm [SSSE3]
|
|
// * PMADDUBSW xmm, xmm [SSSE3]
|
|
// * PMADDUBSW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PMADDUBSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMADDUBSW", 2, Operands { v0, v1 })
|
|
// PMADDUBSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMADDUBSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMADDUBSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMADDUBSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMADDUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMADDWD performs "Multiply and Add Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : PMADDWD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMADDWD mm, mm [MMX]
|
|
// * PMADDWD m64, mm [MMX]
|
|
// * PMADDWD xmm, xmm [SSE2]
|
|
// * PMADDWD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMADDWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMADDWD", 2, Operands { v0, v1 })
|
|
// PMADDWD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMADDWD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMADDWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMADDWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMADDWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXSB performs "Maximum of Packed Signed Byte Integers".
|
|
//
|
|
// Mnemonic : PMAXSB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMAXSB xmm, xmm [SSE4.1]
|
|
// * PMAXSB m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMAXSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXSB", 2, Operands { v0, v1 })
|
|
// PMAXSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXSD performs "Maximum of Packed Signed Doubleword Integers".
|
|
//
|
|
// Mnemonic : PMAXSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMAXSD xmm, xmm [SSE4.1]
|
|
// * PMAXSD m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMAXSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXSD", 2, Operands { v0, v1 })
|
|
// PMAXSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXSD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXSW performs "Maximum of Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : PMAXSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMAXSW mm, mm [MMX+]
|
|
// * PMAXSW m64, mm [MMX+]
|
|
// * PMAXSW xmm, xmm [SSE2]
|
|
// * PMAXSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMAXSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXSW", 2, Operands { v0, v1 })
|
|
// PMAXSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMAXSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXUB performs "Maximum of Packed Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : PMAXUB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMAXUB mm, mm [MMX+]
|
|
// * PMAXUB m64, mm [MMX+]
|
|
// * PMAXUB xmm, xmm [SSE2]
|
|
// * PMAXUB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMAXUB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXUB", 2, Operands { v0, v1 })
|
|
// PMAXUB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXUB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMAXUB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXUB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXUD performs "Maximum of Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : PMAXUD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMAXUD xmm, xmm [SSE4.1]
|
|
// * PMAXUD m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMAXUD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXUD", 2, Operands { v0, v1 })
|
|
// PMAXUD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXUD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMAXUW performs "Maximum of Packed Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : PMAXUW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMAXUW xmm, xmm [SSE4.1]
|
|
// * PMAXUW m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMAXUW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMAXUW", 2, Operands { v0, v1 })
|
|
// PMAXUW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMAXUW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMAXUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINSB performs "Minimum of Packed Signed Byte Integers".
|
|
//
|
|
// Mnemonic : PMINSB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMINSB xmm, xmm [SSE4.1]
|
|
// * PMINSB m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMINSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINSB", 2, Operands { v0, v1 })
|
|
// PMINSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINSD performs "Minimum of Packed Signed Doubleword Integers".
|
|
//
|
|
// Mnemonic : PMINSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMINSD xmm, xmm [SSE4.1]
|
|
// * PMINSD m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMINSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINSD", 2, Operands { v0, v1 })
|
|
// PMINSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINSD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINSW performs "Minimum of Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : PMINSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMINSW mm, mm [MMX+]
|
|
// * PMINSW m64, mm [MMX+]
|
|
// * PMINSW xmm, xmm [SSE2]
|
|
// * PMINSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMINSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINSW", 2, Operands { v0, v1 })
|
|
// PMINSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMINSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINUB performs "Minimum of Packed Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : PMINUB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMINUB mm, mm [MMX+]
|
|
// * PMINUB m64, mm [MMX+]
|
|
// * PMINUB xmm, xmm [SSE2]
|
|
// * PMINUB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMINUB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINUB", 2, Operands { v0, v1 })
|
|
// PMINUB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINUB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMINUB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINUB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINUD performs "Minimum of Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : PMINUD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMINUD xmm, xmm [SSE4.1]
|
|
// * PMINUD m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMINUD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINUD", 2, Operands { v0, v1 })
|
|
// PMINUD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINUD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMINUW performs "Minimum of Packed Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : PMINUW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMINUW xmm, xmm [SSE4.1]
|
|
// * PMINUW m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMINUW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMINUW", 2, Operands { v0, v1 })
|
|
// PMINUW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMINUW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMINUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVMSKB performs "Move Byte Mask".
|
|
//
|
|
// Mnemonic : PMOVMSKB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVMSKB mm, r32 [MMX+]
|
|
// * PMOVMSKB xmm, r32 [SSE2]
|
|
//
|
|
func (self *Program) PMOVMSKB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVMSKB", 2, Operands { v0, v1 })
|
|
// PMOVMSKB mm, r32
|
|
if isMM(v0) && isReg32(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVMSKB xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVMSKB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXBD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXBD xmm, xmm [SSE4.1]
|
|
// * PMOVSXBD m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXBD", 2, Operands { v0, v1 })
|
|
// PMOVSXBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXBD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXBQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXBQ xmm, xmm [SSE4.1]
|
|
// * PMOVSXBQ m16, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXBQ", 2, Operands { v0, v1 })
|
|
// PMOVSXBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXBQ m16, xmm
|
|
if isM16(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXBW xmm, xmm [SSE4.1]
|
|
// * PMOVSXBW m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXBW", 2, Operands { v0, v1 })
|
|
// PMOVSXBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXBW m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXDQ xmm, xmm [SSE4.1]
|
|
// * PMOVSXDQ m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXDQ", 2, Operands { v0, v1 })
|
|
// PMOVSXDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXDQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXWD xmm, xmm [SSE4.1]
|
|
// * PMOVSXWD m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXWD", 2, Operands { v0, v1 })
|
|
// PMOVSXWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXWD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : PMOVSXWQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVSXWQ xmm, xmm [SSE4.1]
|
|
// * PMOVSXWQ m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVSXWQ", 2, Operands { v0, v1 })
|
|
// PMOVSXWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVSXWQ m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVSXWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXBD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXBD xmm, xmm [SSE4.1]
|
|
// * PMOVZXBD m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXBD", 2, Operands { v0, v1 })
|
|
// PMOVZXBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXBD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXBQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXBQ xmm, xmm [SSE4.1]
|
|
// * PMOVZXBQ m16, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXBQ", 2, Operands { v0, v1 })
|
|
// PMOVZXBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXBQ m16, xmm
|
|
if isM16(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXBW xmm, xmm [SSE4.1]
|
|
// * PMOVZXBW m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXBW", 2, Operands { v0, v1 })
|
|
// PMOVZXBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXBW m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXDQ xmm, xmm [SSE4.1]
|
|
// * PMOVZXDQ m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXDQ", 2, Operands { v0, v1 })
|
|
// PMOVZXDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXDQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXWD xmm, xmm [SSE4.1]
|
|
// * PMOVZXWD m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXWD", 2, Operands { v0, v1 })
|
|
// PMOVZXWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXWD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : PMOVZXWQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMOVZXWQ xmm, xmm [SSE4.1]
|
|
// * PMOVZXWQ m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMOVZXWQ", 2, Operands { v0, v1 })
|
|
// PMOVZXWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMOVZXWQ m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMOVZXWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result".
|
|
//
|
|
// Mnemonic : PMULDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMULDQ xmm, xmm [SSE4.1]
|
|
// * PMULDQ m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMULDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULDQ", 2, Operands { v0, v1 })
|
|
// PMULDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale".
|
|
//
|
|
// Mnemonic : PMULHRSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMULHRSW mm, mm [SSSE3]
|
|
// * PMULHRSW m64, mm [SSSE3]
|
|
// * PMULHRSW xmm, xmm [SSSE3]
|
|
// * PMULHRSW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PMULHRSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULHRSW", 2, Operands { v0, v1 })
|
|
// PMULHRSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHRSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMULHRSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHRSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULHRSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULHRW performs "Packed Multiply High Rounded Word".
|
|
//
|
|
// Mnemonic : PMULHRW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMULHRW mm, mm [3dnow!]
|
|
// * PMULHRW m64, mm [3dnow!]
|
|
//
|
|
func (self *Program) PMULHRW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULHRW", 2, Operands { v0, v1 })
|
|
// PMULHRW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xb7)
|
|
})
|
|
}
|
|
// PMULHRW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xb7)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULHRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result".
|
|
//
|
|
// Mnemonic : PMULHUW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMULHUW mm, mm [MMX+]
|
|
// * PMULHUW m64, mm [MMX+]
|
|
// * PMULHUW xmm, xmm [SSE2]
|
|
// * PMULHUW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMULHUW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULHUW", 2, Operands { v0, v1 })
|
|
// PMULHUW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHUW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMULHUW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHUW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULHUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULHW performs "Multiply Packed Signed Word Integers and Store High Result".
|
|
//
|
|
// Mnemonic : PMULHW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMULHW mm, mm [MMX]
|
|
// * PMULHW m64, mm [MMX]
|
|
// * PMULHW xmm, xmm [SSE2]
|
|
// * PMULHW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMULHW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULHW", 2, Operands { v0, v1 })
|
|
// PMULHW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMULHW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULHW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result".
|
|
//
|
|
// Mnemonic : PMULLD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PMULLD xmm, xmm [SSE4.1]
|
|
// * PMULLD m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PMULLD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULLD", 2, Operands { v0, v1 })
|
|
// PMULLD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULLD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULLW performs "Multiply Packed Signed Word Integers and Store Low Result".
|
|
//
|
|
// Mnemonic : PMULLW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMULLW mm, mm [MMX]
|
|
// * PMULLW m64, mm [MMX]
|
|
// * PMULLW xmm, xmm [SSE2]
|
|
// * PMULLW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMULLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULLW", 2, Operands { v0, v1 })
|
|
// PMULLW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULLW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMULLW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULLW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PMULUDQ performs "Multiply Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : PMULUDQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PMULUDQ mm, mm [SSE2]
|
|
// * PMULUDQ m64, mm [SSE2]
|
|
// * PMULUDQ xmm, xmm [SSE2]
|
|
// * PMULUDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PMULUDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PMULUDQ", 2, Operands { v0, v1 })
|
|
// PMULUDQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULUDQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PMULUDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PMULUDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PMULUDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POPCNTL performs "Count of Number of Bits Set to 1".
|
|
//
|
|
// Mnemonic : POPCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * POPCNTL r32, r32 [POPCNT]
|
|
// * POPCNTL m32, r32 [POPCNT]
|
|
//
|
|
func (self *Program) POPCNTL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("POPCNTL", 2, Operands { v0, v1 })
|
|
// POPCNTL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POPCNTL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POPCNTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POPCNTQ performs "Count of Number of Bits Set to 1".
|
|
//
|
|
// Mnemonic : POPCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * POPCNTQ r64, r64 [POPCNT]
|
|
// * POPCNTQ m64, r64 [POPCNT]
|
|
//
|
|
func (self *Program) POPCNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("POPCNTQ", 2, Operands { v0, v1 })
|
|
// POPCNTQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POPCNTQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POPCNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POPCNTW performs "Count of Number of Bits Set to 1".
|
|
//
|
|
// Mnemonic : POPCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * POPCNTW r16, r16 [POPCNT]
|
|
// * POPCNTW m16, r16 [POPCNT]
|
|
//
|
|
func (self *Program) POPCNTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("POPCNTW", 2, Operands { v0, v1 })
|
|
// POPCNTW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POPCNTW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_POPCNT)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POPCNTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POPQ performs "Pop a Value from the Stack".
|
|
//
|
|
// Mnemonic : POP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * POPQ r64
|
|
// * POPQ m64
|
|
//
|
|
func (self *Program) POPQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("POPQ", 1, Operands { v0 })
|
|
// POPQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x58 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x8f)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POPQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x8f)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POPQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POPW performs "Pop a Value from the Stack".
|
|
//
|
|
// Mnemonic : POP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * POPW r16
|
|
// * POPW m16
|
|
//
|
|
func (self *Program) POPW(v0 interface{}) *Instruction {
|
|
p := self.alloc("POPW", 1, Operands { v0 })
|
|
// POPW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x58 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x8f)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POPW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x8f)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POPW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// POR performs "Packed Bitwise Logical OR".
|
|
//
|
|
// Mnemonic : POR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * POR mm, mm [MMX]
|
|
// * POR m64, mm [MMX]
|
|
// * POR xmm, xmm [SSE2]
|
|
// * POR m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) POR(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("POR", 2, Operands { v0, v1 })
|
|
// POR mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POR m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// POR xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// POR m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for POR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCH performs "Prefetch Data into Caches".
|
|
//
|
|
// Mnemonic : PREFETCH
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCH m8 [PREFETCH]
|
|
//
|
|
func (self *Program) PREFETCH(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCH", 1, Operands { v0 })
|
|
// PREFETCH m8
|
|
if isM8(v0) {
|
|
self.require(ISA_PREFETCH)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0d)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCH")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHNTA performs "Prefetch Data Into Caches using NTA Hint".
|
|
//
|
|
// Mnemonic : PREFETCHNTA
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHNTA m8 [MMX+]
|
|
//
|
|
func (self *Program) PREFETCHNTA(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHNTA", 1, Operands { v0 })
|
|
// PREFETCHNTA m8
|
|
if isM8(v0) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x18)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHNTA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHT0 performs "Prefetch Data Into Caches using T0 Hint".
|
|
//
|
|
// Mnemonic : PREFETCHT0
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHT0 m8 [MMX+]
|
|
//
|
|
func (self *Program) PREFETCHT0(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHT0", 1, Operands { v0 })
|
|
// PREFETCHT0 m8
|
|
if isM8(v0) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x18)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHT0")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHT1 performs "Prefetch Data Into Caches using T1 Hint".
|
|
//
|
|
// Mnemonic : PREFETCHT1
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHT1 m8 [MMX+]
|
|
//
|
|
func (self *Program) PREFETCHT1(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHT1", 1, Operands { v0 })
|
|
// PREFETCHT1 m8
|
|
if isM8(v0) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x18)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHT1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHT2 performs "Prefetch Data Into Caches using T2 Hint".
|
|
//
|
|
// Mnemonic : PREFETCHT2
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHT2 m8 [MMX+]
|
|
//
|
|
func (self *Program) PREFETCHT2(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHT2", 1, Operands { v0 })
|
|
// PREFETCHT2 m8
|
|
if isM8(v0) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x18)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHT2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHW performs "Prefetch Data into Caches in Anticipation of a Write".
|
|
//
|
|
// Mnemonic : PREFETCHW
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHW m8 [PREFETCHW]
|
|
//
|
|
func (self *Program) PREFETCHW(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHW", 1, Operands { v0 })
|
|
// PREFETCHW m8
|
|
if isM8(v0) {
|
|
self.require(ISA_PREFETCHW)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0d)
|
|
m.mrsd(1, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PREFETCHWT1 performs "Prefetch Vector Data Into Caches with Intent to Write and T1 Hint".
|
|
//
|
|
// Mnemonic : PREFETCHWT1
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PREFETCHWT1 m8 [PREFETCHWT1]
|
|
//
|
|
func (self *Program) PREFETCHWT1(v0 interface{}) *Instruction {
|
|
p := self.alloc("PREFETCHWT1", 1, Operands { v0 })
|
|
// PREFETCHWT1 m8
|
|
if isM8(v0) {
|
|
self.require(ISA_PREFETCHWT1)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0d)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PREFETCHWT1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSADBW performs "Compute Sum of Absolute Differences".
|
|
//
|
|
// Mnemonic : PSADBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSADBW mm, mm [MMX+]
|
|
// * PSADBW m64, mm [MMX+]
|
|
// * PSADBW xmm, xmm [SSE2]
|
|
// * PSADBW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSADBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSADBW", 2, Operands { v0, v1 })
|
|
// PSADBW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSADBW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSADBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSADBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSADBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSHUFB performs "Packed Shuffle Bytes".
|
|
//
|
|
// Mnemonic : PSHUFB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSHUFB mm, mm [SSSE3]
|
|
// * PSHUFB m64, mm [SSSE3]
|
|
// * PSHUFB xmm, xmm [SSSE3]
|
|
// * PSHUFB m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PSHUFB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSHUFB", 2, Operands { v0, v1 })
|
|
// PSHUFB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSHUFB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSHUFB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSHUFB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSHUFB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSHUFD performs "Shuffle Packed Doublewords".
|
|
//
|
|
// Mnemonic : PSHUFD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PSHUFD imm8, xmm, xmm [SSE2]
|
|
// * PSHUFD imm8, m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PSHUFD", 3, Operands { v0, v1, v2 })
|
|
// PSHUFD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSHUFD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSHUFD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSHUFHW performs "Shuffle Packed High Words".
|
|
//
|
|
// Mnemonic : PSHUFHW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PSHUFHW imm8, xmm, xmm [SSE2]
|
|
// * PSHUFHW imm8, m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PSHUFHW", 3, Operands { v0, v1, v2 })
|
|
// PSHUFHW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSHUFHW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSHUFHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSHUFLW performs "Shuffle Packed Low Words".
|
|
//
|
|
// Mnemonic : PSHUFLW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PSHUFLW imm8, xmm, xmm [SSE2]
|
|
// * PSHUFLW imm8, m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PSHUFLW", 3, Operands { v0, v1, v2 })
|
|
// PSHUFLW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSHUFLW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSHUFLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSHUFW performs "Shuffle Packed Words".
|
|
//
|
|
// Mnemonic : PSHUFW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PSHUFW imm8, mm, mm [MMX+]
|
|
// * PSHUFW imm8, m64, mm [MMX+]
|
|
//
|
|
func (self *Program) PSHUFW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("PSHUFW", 3, Operands { v0, v1, v2 })
|
|
// PSHUFW imm8, mm, mm
|
|
if isImm8(v0) && isMM(v1) && isMM(v2) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSHUFW imm8, m64, mm
|
|
if isImm8(v0) && isM64(v1) && isMM(v2) {
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSHUFW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSIGNB performs "Packed Sign of Byte Integers".
|
|
//
|
|
// Mnemonic : PSIGNB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSIGNB mm, mm [SSSE3]
|
|
// * PSIGNB m64, mm [SSSE3]
|
|
// * PSIGNB xmm, xmm [SSSE3]
|
|
// * PSIGNB m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PSIGNB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSIGNB", 2, Operands { v0, v1 })
|
|
// PSIGNB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGNB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSIGNB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGNB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSIGNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSIGND performs "Packed Sign of Doubleword Integers".
|
|
//
|
|
// Mnemonic : PSIGND
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSIGND mm, mm [SSSE3]
|
|
// * PSIGND m64, mm [SSSE3]
|
|
// * PSIGND xmm, xmm [SSSE3]
|
|
// * PSIGND m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PSIGND(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSIGND", 2, Operands { v0, v1 })
|
|
// PSIGND mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGND m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSIGND xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGND m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSIGND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSIGNW performs "Packed Sign of Word Integers".
|
|
//
|
|
// Mnemonic : PSIGNW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSIGNW mm, mm [SSSE3]
|
|
// * PSIGNW m64, mm [SSSE3]
|
|
// * PSIGNW xmm, xmm [SSSE3]
|
|
// * PSIGNW m128, xmm [SSSE3]
|
|
//
|
|
func (self *Program) PSIGNW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSIGNW", 2, Operands { v0, v1 })
|
|
// PSIGNW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGNW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSIGNW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSIGNW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSSE3)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSIGNW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSLLD performs "Shift Packed Doubleword Data Left Logical".
|
|
//
|
|
// Mnemonic : PSLLD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSLLD imm8, mm [MMX]
|
|
// * PSLLD mm, mm [MMX]
|
|
// * PSLLD m64, mm [MMX]
|
|
// * PSLLD imm8, xmm [SSE2]
|
|
// * PSLLD xmm, xmm [SSE2]
|
|
// * PSLLD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSLLD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSLLD", 2, Operands { v0, v1 })
|
|
// PSLLD imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSLLD imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSLLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSLLDQ performs "Shift Packed Double Quadword Left Logical".
|
|
//
|
|
// Mnemonic : PSLLDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PSLLDQ imm8, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSLLDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSLLDQ", 2, Operands { v0, v1 })
|
|
// PSLLDQ imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSLLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSLLQ performs "Shift Packed Quadword Data Left Logical".
|
|
//
|
|
// Mnemonic : PSLLQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSLLQ imm8, mm [MMX]
|
|
// * PSLLQ mm, mm [MMX]
|
|
// * PSLLQ m64, mm [MMX]
|
|
// * PSLLQ imm8, xmm [SSE2]
|
|
// * PSLLQ xmm, xmm [SSE2]
|
|
// * PSLLQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSLLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSLLQ", 2, Operands { v0, v1 })
|
|
// PSLLQ imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSLLQ imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSLLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSLLW performs "Shift Packed Word Data Left Logical".
|
|
//
|
|
// Mnemonic : PSLLW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSLLW imm8, mm [MMX]
|
|
// * PSLLW mm, mm [MMX]
|
|
// * PSLLW m64, mm [MMX]
|
|
// * PSLLW imm8, xmm [SSE2]
|
|
// * PSLLW xmm, xmm [SSE2]
|
|
// * PSLLW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSLLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSLLW", 2, Operands { v0, v1 })
|
|
// PSLLW imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSLLW imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSLLW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSLLW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSLLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRAD performs "Shift Packed Doubleword Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : PSRAD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSRAD imm8, mm [MMX]
|
|
// * PSRAD mm, mm [MMX]
|
|
// * PSRAD m64, mm [MMX]
|
|
// * PSRAD imm8, xmm [SSE2]
|
|
// * PSRAD xmm, xmm [SSE2]
|
|
// * PSRAD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRAD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRAD", 2, Operands { v0, v1 })
|
|
// PSRAD imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRAD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRAD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSRAD imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRAD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRAD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRAD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRAW performs "Shift Packed Word Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : PSRAW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSRAW imm8, mm [MMX]
|
|
// * PSRAW mm, mm [MMX]
|
|
// * PSRAW m64, mm [MMX]
|
|
// * PSRAW imm8, xmm [SSE2]
|
|
// * PSRAW xmm, xmm [SSE2]
|
|
// * PSRAW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRAW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRAW", 2, Operands { v0, v1 })
|
|
// PSRAW imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRAW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRAW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSRAW imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRAW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRAW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRAW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRLD performs "Shift Packed Doubleword Data Right Logical".
|
|
//
|
|
// Mnemonic : PSRLD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSRLD imm8, mm [MMX]
|
|
// * PSRLD mm, mm [MMX]
|
|
// * PSRLD m64, mm [MMX]
|
|
// * PSRLD imm8, xmm [SSE2]
|
|
// * PSRLD xmm, xmm [SSE2]
|
|
// * PSRLD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRLD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRLD", 2, Operands { v0, v1 })
|
|
// PSRLD imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSRLD imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRLDQ performs "Shift Packed Double Quadword Right Logical".
|
|
//
|
|
// Mnemonic : PSRLDQ
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * PSRLDQ imm8, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRLDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRLDQ", 2, Operands { v0, v1 })
|
|
// PSRLDQ imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRLQ performs "Shift Packed Quadword Data Right Logical".
|
|
//
|
|
// Mnemonic : PSRLQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSRLQ imm8, mm [MMX]
|
|
// * PSRLQ mm, mm [MMX]
|
|
// * PSRLQ m64, mm [MMX]
|
|
// * PSRLQ imm8, xmm [SSE2]
|
|
// * PSRLQ xmm, xmm [SSE2]
|
|
// * PSRLQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRLQ", 2, Operands { v0, v1 })
|
|
// PSRLQ imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSRLQ imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSRLW performs "Shift Packed Word Data Right Logical".
|
|
//
|
|
// Mnemonic : PSRLW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * PSRLW imm8, mm [MMX]
|
|
// * PSRLW mm, mm [MMX]
|
|
// * PSRLW m64, mm [MMX]
|
|
// * PSRLW imm8, xmm [SSE2]
|
|
// * PSRLW xmm, xmm [SSE2]
|
|
// * PSRLW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSRLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSRLW", 2, Operands { v0, v1 })
|
|
// PSRLW imm8, mm
|
|
if isImm8(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSRLW imm8, xmm
|
|
if isImm8(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PSRLW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSRLW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSRLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBB performs "Subtract Packed Byte Integers".
|
|
//
|
|
// Mnemonic : PSUBB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBB mm, mm [MMX]
|
|
// * PSUBB m64, mm [MMX]
|
|
// * PSUBB xmm, xmm [SSE2]
|
|
// * PSUBB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBB", 2, Operands { v0, v1 })
|
|
// PSUBB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBD performs "Subtract Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : PSUBD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBD mm, mm [MMX]
|
|
// * PSUBD m64, mm [MMX]
|
|
// * PSUBD xmm, xmm [SSE2]
|
|
// * PSUBD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBD", 2, Operands { v0, v1 })
|
|
// PSUBD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBQ performs "Subtract Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : PSUBQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBQ mm, mm [SSE2]
|
|
// * PSUBQ m64, mm [SSE2]
|
|
// * PSUBQ xmm, xmm [SSE2]
|
|
// * PSUBQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBQ", 2, Operands { v0, v1 })
|
|
// PSUBQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PSUBSB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBSB mm, mm [MMX]
|
|
// * PSUBSB m64, mm [MMX]
|
|
// * PSUBSB xmm, xmm [SSE2]
|
|
// * PSUBSB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBSB", 2, Operands { v0, v1 })
|
|
// PSUBSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : PSUBSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBSW mm, mm [MMX]
|
|
// * PSUBSW m64, mm [MMX]
|
|
// * PSUBSW xmm, xmm [SSE2]
|
|
// * PSUBSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBSW", 2, Operands { v0, v1 })
|
|
// PSUBSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PSUBUSB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBUSB mm, mm [MMX]
|
|
// * PSUBUSB m64, mm [MMX]
|
|
// * PSUBUSB xmm, xmm [SSE2]
|
|
// * PSUBUSB m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBUSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBUSB", 2, Operands { v0, v1 })
|
|
// PSUBUSB mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBUSB m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBUSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBUSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBUSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : PSUBUSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBUSW mm, mm [MMX]
|
|
// * PSUBUSW m64, mm [MMX]
|
|
// * PSUBUSW xmm, xmm [SSE2]
|
|
// * PSUBUSW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBUSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBUSW", 2, Operands { v0, v1 })
|
|
// PSUBUSW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBUSW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBUSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBUSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBUSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSUBW performs "Subtract Packed Word Integers".
|
|
//
|
|
// Mnemonic : PSUBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PSUBW mm, mm [MMX]
|
|
// * PSUBW m64, mm [MMX]
|
|
// * PSUBW xmm, xmm [SSE2]
|
|
// * PSUBW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PSUBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSUBW", 2, Operands { v0, v1 })
|
|
// PSUBW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PSUBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PSUBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PSWAPD performs "Packed Swap Doubleword".
|
|
//
|
|
// Mnemonic : PSWAPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PSWAPD mm, mm [3dnow!+]
|
|
// * PSWAPD m64, mm [3dnow!+]
|
|
//
|
|
func (self *Program) PSWAPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PSWAPD", 2, Operands { v0, v1 })
|
|
// PSWAPD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
m.emit(0xbb)
|
|
})
|
|
}
|
|
// PSWAPD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_3DNOW_PLUS)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
m.emit(0xbb)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PSWAPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PTEST performs "Packed Logical Compare".
|
|
//
|
|
// Mnemonic : PTEST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PTEST xmm, xmm [SSE4.1]
|
|
// * PTEST m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) PTEST(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PTEST", 2, Operands { v0, v1 })
|
|
// PTEST xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PTEST m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PTEST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words".
|
|
//
|
|
// Mnemonic : PUNPCKHBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKHBW mm, mm [MMX]
|
|
// * PUNPCKHBW m64, mm [MMX]
|
|
// * PUNPCKHBW xmm, xmm [SSE2]
|
|
// * PUNPCKHBW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKHBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKHBW", 2, Operands { v0, v1 })
|
|
// PUNPCKHBW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHBW m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKHBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKHBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords".
|
|
//
|
|
// Mnemonic : PUNPCKHDQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKHDQ mm, mm [MMX]
|
|
// * PUNPCKHDQ m64, mm [MMX]
|
|
// * PUNPCKHDQ xmm, xmm [SSE2]
|
|
// * PUNPCKHDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKHDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKHDQ", 2, Operands { v0, v1 })
|
|
// PUNPCKHDQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHDQ m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKHDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKHDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords".
|
|
//
|
|
// Mnemonic : PUNPCKHQDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PUNPCKHQDQ xmm, xmm [SSE2]
|
|
// * PUNPCKHQDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKHQDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKHQDQ", 2, Operands { v0, v1 })
|
|
// PUNPCKHQDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHQDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKHQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords".
|
|
//
|
|
// Mnemonic : PUNPCKHWD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKHWD mm, mm [MMX]
|
|
// * PUNPCKHWD m64, mm [MMX]
|
|
// * PUNPCKHWD xmm, xmm [SSE2]
|
|
// * PUNPCKHWD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKHWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKHWD", 2, Operands { v0, v1 })
|
|
// PUNPCKHWD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHWD m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKHWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKHWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKHWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words".
|
|
//
|
|
// Mnemonic : PUNPCKLBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKLBW mm, mm [MMX]
|
|
// * PUNPCKLBW m32, mm [MMX]
|
|
// * PUNPCKLBW xmm, xmm [SSE2]
|
|
// * PUNPCKLBW m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKLBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKLBW", 2, Operands { v0, v1 })
|
|
// PUNPCKLBW mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLBW m32, mm
|
|
if isM32(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKLBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKLBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords".
|
|
//
|
|
// Mnemonic : PUNPCKLDQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKLDQ mm, mm [MMX]
|
|
// * PUNPCKLDQ m32, mm [MMX]
|
|
// * PUNPCKLDQ xmm, xmm [SSE2]
|
|
// * PUNPCKLDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKLDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKLDQ", 2, Operands { v0, v1 })
|
|
// PUNPCKLDQ mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLDQ m32, mm
|
|
if isM32(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKLDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords".
|
|
//
|
|
// Mnemonic : PUNPCKLQDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PUNPCKLQDQ xmm, xmm [SSE2]
|
|
// * PUNPCKLQDQ m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKLQDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKLQDQ", 2, Operands { v0, v1 })
|
|
// PUNPCKLQDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLQDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKLQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords".
|
|
//
|
|
// Mnemonic : PUNPCKLWD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUNPCKLWD mm, mm [MMX]
|
|
// * PUNPCKLWD m32, mm [MMX]
|
|
// * PUNPCKLWD xmm, xmm [SSE2]
|
|
// * PUNPCKLWD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PUNPCKLWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PUNPCKLWD", 2, Operands { v0, v1 })
|
|
// PUNPCKLWD mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLWD m32, mm
|
|
if isM32(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PUNPCKLWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUNPCKLWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUNPCKLWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUSHQ performs "Push Value Onto the Stack".
|
|
//
|
|
// Mnemonic : PUSH
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PUSHQ imm8
|
|
// * PUSHQ imm32
|
|
// * PUSHQ r64
|
|
// * PUSHQ m64
|
|
//
|
|
func (self *Program) PUSHQ(v0 interface{}) *Instruction {
|
|
p := self.alloc("PUSHQ", 1, Operands { v0 })
|
|
// PUSHQ imm8
|
|
if isImm8Ext(v0, 8) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x6a)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PUSHQ imm32
|
|
if isImm32Ext(v0, 8) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x68)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// PUSHQ r64
|
|
if isReg64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x50 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUSHQ m64
|
|
if isM64(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUSHQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PUSHW performs "Push Value Onto the Stack".
|
|
//
|
|
// Mnemonic : PUSH
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * PUSHW r16
|
|
// * PUSHW m16
|
|
//
|
|
func (self *Program) PUSHW(v0 interface{}) *Instruction {
|
|
p := self.alloc("PUSHW", 1, Operands { v0 })
|
|
// PUSHW r16
|
|
if isReg16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x50 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0xff)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PUSHW m16
|
|
if isM16(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0xff)
|
|
m.mrsd(6, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PUSHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// PXOR performs "Packed Bitwise Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : PXOR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * PXOR mm, mm [MMX]
|
|
// * PXOR m64, mm [MMX]
|
|
// * PXOR xmm, xmm [SSE2]
|
|
// * PXOR m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) PXOR(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("PXOR", 2, Operands { v0, v1 })
|
|
// PXOR mm, mm
|
|
if isMM(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PXOR m64, mm
|
|
if isM64(v0) && isMM(v1) {
|
|
self.require(ISA_MMX)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// PXOR xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// PXOR m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for PXOR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCLB performs "Rotate Left through Carry Flag".
|
|
//
|
|
// Mnemonic : RCL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCLB 1, r8
|
|
// * RCLB imm8, r8
|
|
// * RCLB cl, r8
|
|
// * RCLB 1, m8
|
|
// * RCLB imm8, m8
|
|
// * RCLB cl, m8
|
|
//
|
|
func (self *Program) RCLB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCLB", 2, Operands { v0, v1 })
|
|
// RCLB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCLB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCLB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCLL performs "Rotate Left through Carry Flag".
|
|
//
|
|
// Mnemonic : RCL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCLL 1, r32
|
|
// * RCLL imm8, r32
|
|
// * RCLL cl, r32
|
|
// * RCLL 1, m32
|
|
// * RCLL imm8, m32
|
|
// * RCLL cl, m32
|
|
//
|
|
func (self *Program) RCLL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCLL", 2, Operands { v0, v1 })
|
|
// RCLL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCLL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCLL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCLQ performs "Rotate Left through Carry Flag".
|
|
//
|
|
// Mnemonic : RCL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCLQ 1, r64
|
|
// * RCLQ imm8, r64
|
|
// * RCLQ cl, r64
|
|
// * RCLQ 1, m64
|
|
// * RCLQ imm8, m64
|
|
// * RCLQ cl, m64
|
|
//
|
|
func (self *Program) RCLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCLQ", 2, Operands { v0, v1 })
|
|
// RCLQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCLQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCLW performs "Rotate Left through Carry Flag".
|
|
//
|
|
// Mnemonic : RCL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCLW 1, r16
|
|
// * RCLW imm8, r16
|
|
// * RCLW cl, r16
|
|
// * RCLW 1, m16
|
|
// * RCLW imm8, m16
|
|
// * RCLW cl, m16
|
|
//
|
|
func (self *Program) RCLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCLW", 2, Operands { v0, v1 })
|
|
// RCLW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCLW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCLW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCLW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(2, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : RCPPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RCPPS xmm, xmm [SSE]
|
|
// * RCPPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) RCPPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCPPS", 2, Operands { v0, v1 })
|
|
// RCPPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x53)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RCPPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x53)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : RCPSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RCPSS xmm, xmm [SSE]
|
|
// * RCPSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) RCPSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCPSS", 2, Operands { v0, v1 })
|
|
// RCPSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x53)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RCPSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x53)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCPSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCRB performs "Rotate Right through Carry Flag".
|
|
//
|
|
// Mnemonic : RCR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCRB 1, r8
|
|
// * RCRB imm8, r8
|
|
// * RCRB cl, r8
|
|
// * RCRB 1, m8
|
|
// * RCRB imm8, m8
|
|
// * RCRB cl, m8
|
|
//
|
|
func (self *Program) RCRB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCRB", 2, Operands { v0, v1 })
|
|
// RCRB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCRB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCRL performs "Rotate Right through Carry Flag".
|
|
//
|
|
// Mnemonic : RCR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCRL 1, r32
|
|
// * RCRL imm8, r32
|
|
// * RCRL cl, r32
|
|
// * RCRL 1, m32
|
|
// * RCRL imm8, m32
|
|
// * RCRL cl, m32
|
|
//
|
|
func (self *Program) RCRL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCRL", 2, Operands { v0, v1 })
|
|
// RCRL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCRL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCRL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCRQ performs "Rotate Right through Carry Flag".
|
|
//
|
|
// Mnemonic : RCR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCRQ 1, r64
|
|
// * RCRQ imm8, r64
|
|
// * RCRQ cl, r64
|
|
// * RCRQ 1, m64
|
|
// * RCRQ imm8, m64
|
|
// * RCRQ cl, m64
|
|
//
|
|
func (self *Program) RCRQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCRQ", 2, Operands { v0, v1 })
|
|
// RCRQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCRQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RCRW performs "Rotate Right through Carry Flag".
|
|
//
|
|
// Mnemonic : RCR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RCRW 1, r16
|
|
// * RCRW imm8, r16
|
|
// * RCRW cl, r16
|
|
// * RCRW 1, m16
|
|
// * RCRW imm8, m16
|
|
// * RCRW cl, m16
|
|
//
|
|
func (self *Program) RCRW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RCRW", 2, Operands { v0, v1 })
|
|
// RCRW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RCRW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RCRW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RCRW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RCRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RDRAND performs "Read Random Number".
|
|
//
|
|
// Mnemonic : RDRAND
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * RDRAND r16 [RDRAND]
|
|
// * RDRAND r32 [RDRAND]
|
|
// * RDRAND r64 [RDRAND]
|
|
//
|
|
func (self *Program) RDRAND(v0 interface{}) *Instruction {
|
|
p := self.alloc("RDRAND", 1, Operands { v0 })
|
|
// RDRAND r16
|
|
if isReg16(v0) {
|
|
self.require(ISA_RDRAND)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RDRAND r32
|
|
if isReg32(v0) {
|
|
self.require(ISA_RDRAND)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RDRAND r64
|
|
if isReg64(v0) {
|
|
self.require(ISA_RDRAND)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf0 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RDRAND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RDSEED performs "Read Random SEED".
|
|
//
|
|
// Mnemonic : RDSEED
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * RDSEED r16 [RDSEED]
|
|
// * RDSEED r32 [RDSEED]
|
|
// * RDSEED r64 [RDSEED]
|
|
//
|
|
func (self *Program) RDSEED(v0 interface{}) *Instruction {
|
|
p := self.alloc("RDSEED", 1, Operands { v0 })
|
|
// RDSEED r16
|
|
if isReg16(v0) {
|
|
self.require(ISA_RDSEED)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RDSEED r32
|
|
if isReg32(v0) {
|
|
self.require(ISA_RDSEED)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RDSEED r64
|
|
if isReg64(v0) {
|
|
self.require(ISA_RDSEED)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc7)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RDSEED")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RDTSC performs "Read Time-Stamp Counter".
|
|
//
|
|
// Mnemonic : RDTSC
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * RDTSC [RDTSC]
|
|
//
|
|
func (self *Program) RDTSC() *Instruction {
|
|
p := self.alloc("RDTSC", 0, Operands { })
|
|
// RDTSC
|
|
self.require(ISA_RDTSC)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x31)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// RDTSCP performs "Read Time-Stamp Counter and Processor ID".
|
|
//
|
|
// Mnemonic : RDTSCP
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * RDTSCP [RDTSCP]
|
|
//
|
|
func (self *Program) RDTSCP() *Instruction {
|
|
p := self.alloc("RDTSCP", 0, Operands { })
|
|
// RDTSCP
|
|
self.require(ISA_RDTSCP)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xf9)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// RET performs "Return from Procedure".
|
|
//
|
|
// Mnemonic : RET
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RET
|
|
// * RET imm16
|
|
//
|
|
func (self *Program) RET(vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("RET", 0, Operands { })
|
|
case 1 : p = self.alloc("RET", 1, Operands { vv[0] })
|
|
default : panic("instruction RET takes 0 or 1 operands")
|
|
}
|
|
// RET
|
|
if len(vv) == 0 {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc3)
|
|
})
|
|
}
|
|
// RET imm16
|
|
if len(vv) == 1 && isImm16(vv[0]) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc2)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RET")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROLB performs "Rotate Left".
|
|
//
|
|
// Mnemonic : ROL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ROLB 1, r8
|
|
// * ROLB imm8, r8
|
|
// * ROLB cl, r8
|
|
// * ROLB 1, m8
|
|
// * ROLB imm8, m8
|
|
// * ROLB cl, m8
|
|
//
|
|
func (self *Program) ROLB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ROLB", 2, Operands { v0, v1 })
|
|
// ROLB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// ROLB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROLB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROLL performs "Rotate Left".
|
|
//
|
|
// Mnemonic : ROL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ROLL 1, r32
|
|
// * ROLL imm8, r32
|
|
// * ROLL cl, r32
|
|
// * ROLL 1, m32
|
|
// * ROLL imm8, m32
|
|
// * ROLL cl, m32
|
|
//
|
|
func (self *Program) ROLL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ROLL", 2, Operands { v0, v1 })
|
|
// ROLL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// ROLL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROLL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROLQ performs "Rotate Left".
|
|
//
|
|
// Mnemonic : ROL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ROLQ 1, r64
|
|
// * ROLQ imm8, r64
|
|
// * ROLQ cl, r64
|
|
// * ROLQ 1, m64
|
|
// * ROLQ imm8, m64
|
|
// * ROLQ cl, m64
|
|
//
|
|
func (self *Program) ROLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ROLQ", 2, Operands { v0, v1 })
|
|
// ROLQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// ROLQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROLW performs "Rotate Left".
|
|
//
|
|
// Mnemonic : ROL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * ROLW 1, r16
|
|
// * ROLW imm8, r16
|
|
// * ROLW cl, r16
|
|
// * ROLW 1, m16
|
|
// * ROLW imm8, m16
|
|
// * ROLW cl, m16
|
|
//
|
|
func (self *Program) ROLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("ROLW", 2, Operands { v0, v1 })
|
|
// ROLW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// ROLW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// ROLW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROLW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORB performs "Rotate Right".
|
|
//
|
|
// Mnemonic : ROR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RORB 1, r8
|
|
// * RORB imm8, r8
|
|
// * RORB cl, r8
|
|
// * RORB 1, m8
|
|
// * RORB imm8, m8
|
|
// * RORB cl, m8
|
|
//
|
|
func (self *Program) RORB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RORB", 2, Operands { v0, v1 })
|
|
// RORB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RORB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORL performs "Rotate Right".
|
|
//
|
|
// Mnemonic : ROR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RORL 1, r32
|
|
// * RORL imm8, r32
|
|
// * RORL cl, r32
|
|
// * RORL 1, m32
|
|
// * RORL imm8, m32
|
|
// * RORL cl, m32
|
|
//
|
|
func (self *Program) RORL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RORL", 2, Operands { v0, v1 })
|
|
// RORL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RORL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORQ performs "Rotate Right".
|
|
//
|
|
// Mnemonic : ROR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RORQ 1, r64
|
|
// * RORQ imm8, r64
|
|
// * RORQ cl, r64
|
|
// * RORQ 1, m64
|
|
// * RORQ imm8, m64
|
|
// * RORQ cl, m64
|
|
//
|
|
func (self *Program) RORQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RORQ", 2, Operands { v0, v1 })
|
|
// RORQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RORQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORW performs "Rotate Right".
|
|
//
|
|
// Mnemonic : ROR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * RORW 1, r16
|
|
// * RORW imm8, r16
|
|
// * RORW cl, r16
|
|
// * RORW 1, m16
|
|
// * RORW imm8, m16
|
|
// * RORW cl, m16
|
|
//
|
|
func (self *Program) RORW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RORW", 2, Operands { v0, v1 })
|
|
// RORW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// RORW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// RORW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(1, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORXL performs "Rotate Right Logical Without Affecting Flags".
|
|
//
|
|
// Mnemonic : RORX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RORXL imm8, r32, r32 [BMI2]
|
|
// * RORXL imm8, m32, r32 [BMI2]
|
|
//
|
|
func (self *Program) RORXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("RORXL", 3, Operands { v0, v1, v2 })
|
|
// RORXL imm8, r32, r32
|
|
if isImm8(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7b)
|
|
m.emit(0xf0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORXL imm8, m32, r32
|
|
if isImm8(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x03, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RORXQ performs "Rotate Right Logical Without Affecting Flags".
|
|
//
|
|
// Mnemonic : RORX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RORXQ imm8, r64, r64 [BMI2]
|
|
// * RORXQ imm8, m64, r64 [BMI2]
|
|
//
|
|
func (self *Program) RORXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("RORXQ", 3, Operands { v0, v1, v2 })
|
|
// RORXQ imm8, r64, r64
|
|
if isImm8(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfb)
|
|
m.emit(0xf0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// RORXQ imm8, m64, r64
|
|
if isImm8(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x83, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RORXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROUNDPD performs "Round Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ROUNDPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ROUNDPD imm8, xmm, xmm [SSE4.1]
|
|
// * ROUNDPD imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) ROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ROUNDPD", 3, Operands { v0, v1, v2 })
|
|
// ROUNDPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROUNDPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROUNDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROUNDPS performs "Round Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ROUNDPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ROUNDPS imm8, xmm, xmm [SSE4.1]
|
|
// * ROUNDPS imm8, m128, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) ROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ROUNDPS", 3, Operands { v0, v1, v2 })
|
|
// ROUNDPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROUNDPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROUNDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROUNDSD performs "Round Scalar Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ROUNDSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ROUNDSD imm8, xmm, xmm [SSE4.1]
|
|
// * ROUNDSD imm8, m64, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) ROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ROUNDSD", 3, Operands { v0, v1, v2 })
|
|
// ROUNDSD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROUNDSD imm8, m64, xmm
|
|
if isImm8(v0) && isM64(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROUNDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// ROUNDSS performs "Round Scalar Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : ROUNDSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * ROUNDSS imm8, xmm, xmm [SSE4.1]
|
|
// * ROUNDSS imm8, m32, xmm [SSE4.1]
|
|
//
|
|
func (self *Program) ROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("ROUNDSS", 3, Operands { v0, v1, v2 })
|
|
// ROUNDSS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// ROUNDSS imm8, m32, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE4_1)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for ROUNDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : RSQRTPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RSQRTPS xmm, xmm [SSE]
|
|
// * RSQRTPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) RSQRTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RSQRTPS", 2, Operands { v0, v1 })
|
|
// RSQRTPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x52)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RSQRTPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x52)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RSQRTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// RSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : RSQRTSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * RSQRTSS xmm, xmm [SSE]
|
|
// * RSQRTSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) RSQRTSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("RSQRTSS", 2, Operands { v0, v1 })
|
|
// RSQRTSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x52)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// RSQRTSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x52)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for RSQRTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SALB performs "Arithmetic Shift Left".
|
|
//
|
|
// Mnemonic : SAL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SALB 1, r8
|
|
// * SALB imm8, r8
|
|
// * SALB cl, r8
|
|
// * SALB 1, m8
|
|
// * SALB imm8, m8
|
|
// * SALB cl, m8
|
|
//
|
|
func (self *Program) SALB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SALB", 2, Operands { v0, v1 })
|
|
// SALB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SALB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SALB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SALL performs "Arithmetic Shift Left".
|
|
//
|
|
// Mnemonic : SAL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SALL 1, r32
|
|
// * SALL imm8, r32
|
|
// * SALL cl, r32
|
|
// * SALL 1, m32
|
|
// * SALL imm8, m32
|
|
// * SALL cl, m32
|
|
//
|
|
func (self *Program) SALL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SALL", 2, Operands { v0, v1 })
|
|
// SALL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SALL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SALL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SALQ performs "Arithmetic Shift Left".
|
|
//
|
|
// Mnemonic : SAL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SALQ 1, r64
|
|
// * SALQ imm8, r64
|
|
// * SALQ cl, r64
|
|
// * SALQ 1, m64
|
|
// * SALQ imm8, m64
|
|
// * SALQ cl, m64
|
|
//
|
|
func (self *Program) SALQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SALQ", 2, Operands { v0, v1 })
|
|
// SALQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SALQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SALQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SALW performs "Arithmetic Shift Left".
|
|
//
|
|
// Mnemonic : SAL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SALW 1, r16
|
|
// * SALW imm8, r16
|
|
// * SALW cl, r16
|
|
// * SALW 1, m16
|
|
// * SALW imm8, m16
|
|
// * SALW cl, m16
|
|
//
|
|
func (self *Program) SALW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SALW", 2, Operands { v0, v1 })
|
|
// SALW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SALW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SALW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SALW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SALW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARB performs "Arithmetic Shift Right".
|
|
//
|
|
// Mnemonic : SAR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SARB 1, r8
|
|
// * SARB imm8, r8
|
|
// * SARB cl, r8
|
|
// * SARB 1, m8
|
|
// * SARB imm8, m8
|
|
// * SARB cl, m8
|
|
//
|
|
func (self *Program) SARB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SARB", 2, Operands { v0, v1 })
|
|
// SARB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SARB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARL performs "Arithmetic Shift Right".
|
|
//
|
|
// Mnemonic : SAR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SARL 1, r32
|
|
// * SARL imm8, r32
|
|
// * SARL cl, r32
|
|
// * SARL 1, m32
|
|
// * SARL imm8, m32
|
|
// * SARL cl, m32
|
|
//
|
|
func (self *Program) SARL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SARL", 2, Operands { v0, v1 })
|
|
// SARL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SARL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARQ performs "Arithmetic Shift Right".
|
|
//
|
|
// Mnemonic : SAR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SARQ 1, r64
|
|
// * SARQ imm8, r64
|
|
// * SARQ cl, r64
|
|
// * SARQ 1, m64
|
|
// * SARQ imm8, m64
|
|
// * SARQ cl, m64
|
|
//
|
|
func (self *Program) SARQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SARQ", 2, Operands { v0, v1 })
|
|
// SARQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SARQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARW performs "Arithmetic Shift Right".
|
|
//
|
|
// Mnemonic : SAR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SARW 1, r16
|
|
// * SARW imm8, r16
|
|
// * SARW cl, r16
|
|
// * SARW 1, m16
|
|
// * SARW imm8, m16
|
|
// * SARW cl, m16
|
|
//
|
|
func (self *Program) SARW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SARW", 2, Operands { v0, v1 })
|
|
// SARW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SARW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SARW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(7, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARXL performs "Arithmetic Shift Right Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SARX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SARXL r32, r32, r32 [BMI2]
|
|
// * SARXL r32, m32, r32 [BMI2]
|
|
//
|
|
func (self *Program) SARXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SARXL", 3, Operands { v0, v1, v2 })
|
|
// SARXL r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7a ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARXL r32, m32, r32
|
|
if isReg32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SARXQ performs "Arithmetic Shift Right Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SARX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SARXQ r64, r64, r64 [BMI2]
|
|
// * SARXQ r64, m64, r64 [BMI2]
|
|
//
|
|
func (self *Program) SARXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SARXQ", 3, Operands { v0, v1, v2 })
|
|
// SARXQ r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfa ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SARXQ r64, m64, r64
|
|
if isReg64(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SARXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SBBB performs "Subtract with Borrow".
|
|
//
|
|
// Mnemonic : SBB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SBBB imm8, al
|
|
// * SBBB imm8, r8
|
|
// * SBBB r8, r8
|
|
// * SBBB m8, r8
|
|
// * SBBB imm8, m8
|
|
// * SBBB r8, m8
|
|
//
|
|
func (self *Program) SBBB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SBBB", 2, Operands { v0, v1 })
|
|
// SBBB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x1c)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x1a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SBBB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SBBB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SBBB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SBBL performs "Subtract with Borrow".
|
|
//
|
|
// Mnemonic : SBB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SBBL imm32, eax
|
|
// * SBBL imm8, r32
|
|
// * SBBL imm32, r32
|
|
// * SBBL r32, r32
|
|
// * SBBL m32, r32
|
|
// * SBBL imm8, m32
|
|
// * SBBL imm32, m32
|
|
// * SBBL r32, m32
|
|
//
|
|
func (self *Program) SBBL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SBBL", 2, Operands { v0, v1 })
|
|
// SBBL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x1d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x1b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SBBL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SBBL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SBBL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SBBQ performs "Subtract with Borrow".
|
|
//
|
|
// Mnemonic : SBB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SBBQ imm32, rax
|
|
// * SBBQ imm8, r64
|
|
// * SBBQ imm32, r64
|
|
// * SBBQ r64, r64
|
|
// * SBBQ m64, r64
|
|
// * SBBQ imm8, m64
|
|
// * SBBQ imm32, m64
|
|
// * SBBQ r64, m64
|
|
//
|
|
func (self *Program) SBBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SBBQ", 2, Operands { v0, v1 })
|
|
// SBBQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x1d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x1b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SBBQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SBBQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SBBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SBBW performs "Subtract with Borrow".
|
|
//
|
|
// Mnemonic : SBB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SBBW imm16, ax
|
|
// * SBBW imm8, r16
|
|
// * SBBW imm16, r16
|
|
// * SBBW r16, r16
|
|
// * SBBW m16, r16
|
|
// * SBBW imm8, m16
|
|
// * SBBW imm16, m16
|
|
// * SBBW r16, m16
|
|
//
|
|
func (self *Program) SBBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SBBW", 2, Operands { v0, v1 })
|
|
// SBBW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x1d)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x1b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SBBW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SBBW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(3, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SBBW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SBBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETA performs "Set byte if above (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : SETA
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETA r8
|
|
// * SETA m8
|
|
//
|
|
func (self *Program) SETA(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETA", 1, Operands { v0 })
|
|
// SETA r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETA m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x97)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETAE performs "Set byte if above or equal (CF == 0)".
|
|
//
|
|
// Mnemonic : SETAE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETAE r8
|
|
// * SETAE m8
|
|
//
|
|
func (self *Program) SETAE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETAE", 1, Operands { v0 })
|
|
// SETAE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETAE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETB performs "Set byte if below (CF == 1)".
|
|
//
|
|
// Mnemonic : SETB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETB r8
|
|
// * SETB m8
|
|
//
|
|
func (self *Program) SETB(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETB", 1, Operands { v0 })
|
|
// SETB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETBE performs "Set byte if below or equal (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : SETBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETBE r8
|
|
// * SETBE m8
|
|
//
|
|
func (self *Program) SETBE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETBE", 1, Operands { v0 })
|
|
// SETBE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETBE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x96)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETC performs "Set byte if carry (CF == 1)".
|
|
//
|
|
// Mnemonic : SETC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETC r8
|
|
// * SETC m8
|
|
//
|
|
func (self *Program) SETC(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETC", 1, Operands { v0 })
|
|
// SETC r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETC m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETE performs "Set byte if equal (ZF == 1)".
|
|
//
|
|
// Mnemonic : SETE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETE r8
|
|
// * SETE m8
|
|
//
|
|
func (self *Program) SETE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETE", 1, Operands { v0 })
|
|
// SETE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x94)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x94)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETG performs "Set byte if greater (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : SETG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETG r8
|
|
// * SETG m8
|
|
//
|
|
func (self *Program) SETG(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETG", 1, Operands { v0 })
|
|
// SETG r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETG m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9f)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETGE performs "Set byte if greater or equal (SF == OF)".
|
|
//
|
|
// Mnemonic : SETGE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETGE r8
|
|
// * SETGE m8
|
|
//
|
|
func (self *Program) SETGE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETGE", 1, Operands { v0 })
|
|
// SETGE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETGE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9d)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETL performs "Set byte if less (SF != OF)".
|
|
//
|
|
// Mnemonic : SETL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETL r8
|
|
// * SETL m8
|
|
//
|
|
func (self *Program) SETL(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETL", 1, Operands { v0 })
|
|
// SETL r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETL m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9c)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETLE performs "Set byte if less or equal (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : SETLE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETLE r8
|
|
// * SETLE m8
|
|
//
|
|
func (self *Program) SETLE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETLE", 1, Operands { v0 })
|
|
// SETLE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETLE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9e)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNA performs "Set byte if not above (CF == 1 or ZF == 1)".
|
|
//
|
|
// Mnemonic : SETNA
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNA r8
|
|
// * SETNA m8
|
|
//
|
|
func (self *Program) SETNA(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNA", 1, Operands { v0 })
|
|
// SETNA r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNA m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x96)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNAE performs "Set byte if not above or equal (CF == 1)".
|
|
//
|
|
// Mnemonic : SETNAE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNAE r8
|
|
// * SETNAE m8
|
|
//
|
|
func (self *Program) SETNAE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNAE", 1, Operands { v0 })
|
|
// SETNAE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNAE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x92)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNAE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNB performs "Set byte if not below (CF == 0)".
|
|
//
|
|
// Mnemonic : SETNB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNB r8
|
|
// * SETNB m8
|
|
//
|
|
func (self *Program) SETNB(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNB", 1, Operands { v0 })
|
|
// SETNB r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNB m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNBE performs "Set byte if not below or equal (CF == 0 and ZF == 0)".
|
|
//
|
|
// Mnemonic : SETNBE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNBE r8
|
|
// * SETNBE m8
|
|
//
|
|
func (self *Program) SETNBE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNBE", 1, Operands { v0 })
|
|
// SETNBE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNBE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x97)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNBE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNC performs "Set byte if not carry (CF == 0)".
|
|
//
|
|
// Mnemonic : SETNC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNC r8
|
|
// * SETNC m8
|
|
//
|
|
func (self *Program) SETNC(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNC", 1, Operands { v0 })
|
|
// SETNC r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNC m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x93)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNE performs "Set byte if not equal (ZF == 0)".
|
|
//
|
|
// Mnemonic : SETNE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNE r8
|
|
// * SETNE m8
|
|
//
|
|
func (self *Program) SETNE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNE", 1, Operands { v0 })
|
|
// SETNE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x95)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x95)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNG performs "Set byte if not greater (ZF == 1 or SF != OF)".
|
|
//
|
|
// Mnemonic : SETNG
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNG r8
|
|
// * SETNG m8
|
|
//
|
|
func (self *Program) SETNG(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNG", 1, Operands { v0 })
|
|
// SETNG r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNG m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9e)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNG")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNGE performs "Set byte if not greater or equal (SF != OF)".
|
|
//
|
|
// Mnemonic : SETNGE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNGE r8
|
|
// * SETNGE m8
|
|
//
|
|
func (self *Program) SETNGE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNGE", 1, Operands { v0 })
|
|
// SETNGE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNGE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9c)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNGE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNL performs "Set byte if not less (SF == OF)".
|
|
//
|
|
// Mnemonic : SETNL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNL r8
|
|
// * SETNL m8
|
|
//
|
|
func (self *Program) SETNL(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNL", 1, Operands { v0 })
|
|
// SETNL r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNL m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9d)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNLE performs "Set byte if not less or equal (ZF == 0 and SF == OF)".
|
|
//
|
|
// Mnemonic : SETNLE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNLE r8
|
|
// * SETNLE m8
|
|
//
|
|
func (self *Program) SETNLE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNLE", 1, Operands { v0 })
|
|
// SETNLE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNLE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9f)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNLE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNO performs "Set byte if not overflow (OF == 0)".
|
|
//
|
|
// Mnemonic : SETNO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNO r8
|
|
// * SETNO m8
|
|
//
|
|
func (self *Program) SETNO(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNO", 1, Operands { v0 })
|
|
// SETNO r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x91)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNO m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x91)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNP performs "Set byte if not parity (PF == 0)".
|
|
//
|
|
// Mnemonic : SETNP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNP r8
|
|
// * SETNP m8
|
|
//
|
|
func (self *Program) SETNP(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNP", 1, Operands { v0 })
|
|
// SETNP r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNP m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9b)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNS performs "Set byte if not sign (SF == 0)".
|
|
//
|
|
// Mnemonic : SETNS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNS r8
|
|
// * SETNS m8
|
|
//
|
|
func (self *Program) SETNS(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNS", 1, Operands { v0 })
|
|
// SETNS r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNS m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x99)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETNZ performs "Set byte if not zero (ZF == 0)".
|
|
//
|
|
// Mnemonic : SETNZ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETNZ r8
|
|
// * SETNZ m8
|
|
//
|
|
func (self *Program) SETNZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETNZ", 1, Operands { v0 })
|
|
// SETNZ r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x95)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETNZ m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x95)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETNZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETO performs "Set byte if overflow (OF == 1)".
|
|
//
|
|
// Mnemonic : SETO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETO r8
|
|
// * SETO m8
|
|
//
|
|
func (self *Program) SETO(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETO", 1, Operands { v0 })
|
|
// SETO r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETO m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x90)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETP performs "Set byte if parity (PF == 1)".
|
|
//
|
|
// Mnemonic : SETP
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETP r8
|
|
// * SETP m8
|
|
//
|
|
func (self *Program) SETP(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETP", 1, Operands { v0 })
|
|
// SETP r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETP m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9a)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETPE performs "Set byte if parity even (PF == 1)".
|
|
//
|
|
// Mnemonic : SETPE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETPE r8
|
|
// * SETPE m8
|
|
//
|
|
func (self *Program) SETPE(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETPE", 1, Operands { v0 })
|
|
// SETPE r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETPE m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9a)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETPE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETPO performs "Set byte if parity odd (PF == 0)".
|
|
//
|
|
// Mnemonic : SETPO
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETPO r8
|
|
// * SETPO m8
|
|
//
|
|
func (self *Program) SETPO(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETPO", 1, Operands { v0 })
|
|
// SETPO r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETPO m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x9b)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETPO")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETS performs "Set byte if sign (SF == 1)".
|
|
//
|
|
// Mnemonic : SETS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETS r8
|
|
// * SETS m8
|
|
//
|
|
func (self *Program) SETS(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETS", 1, Operands { v0 })
|
|
// SETS r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETS m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x98)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SETZ performs "Set byte if zero (ZF == 1)".
|
|
//
|
|
// Mnemonic : SETZ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SETZ r8
|
|
// * SETZ m8
|
|
//
|
|
func (self *Program) SETZ(v0 interface{}) *Instruction {
|
|
p := self.alloc("SETZ", 1, Operands { v0 })
|
|
// SETZ r8
|
|
if isReg8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0x94)
|
|
m.emit(0xc0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SETZ m8
|
|
if isM8(v0) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x94)
|
|
m.mrsd(0, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SETZ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SFENCE performs "Store Fence".
|
|
//
|
|
// Mnemonic : SFENCE
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * SFENCE [MMX+]
|
|
//
|
|
func (self *Program) SFENCE() *Instruction {
|
|
p := self.alloc("SFENCE", 0, Operands { })
|
|
// SFENCE
|
|
self.require(ISA_MMX_PLUS)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.emit(0xf8)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// SHA1MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords".
|
|
//
|
|
// Mnemonic : SHA1MSG1
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA1MSG1 xmm, xmm [SHA]
|
|
// * SHA1MSG1 m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA1MSG1(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHA1MSG1", 2, Operands { v0, v1 })
|
|
// SHA1MSG1 xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xc9)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SHA1MSG1 m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xc9)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA1MSG1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA1MSG2 performs "Perform a Final Calculation for the Next Four SHA1 Message Doublewords".
|
|
//
|
|
// Mnemonic : SHA1MSG2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA1MSG2 xmm, xmm [SHA]
|
|
// * SHA1MSG2 m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA1MSG2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHA1MSG2", 2, Operands { v0, v1 })
|
|
// SHA1MSG2 xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xca)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SHA1MSG2 m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xca)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA1MSG2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA1NEXTE performs "Calculate SHA1 State Variable E after Four Rounds".
|
|
//
|
|
// Mnemonic : SHA1NEXTE
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA1NEXTE xmm, xmm [SHA]
|
|
// * SHA1NEXTE m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA1NEXTE(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHA1NEXTE", 2, Operands { v0, v1 })
|
|
// SHA1NEXTE xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xc8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SHA1NEXTE m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xc8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA1NEXTE")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA1RNDS4 performs "Perform Four Rounds of SHA1 Operation".
|
|
//
|
|
// Mnemonic : SHA1RNDS4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA1RNDS4 imm8, xmm, xmm [SHA]
|
|
// * SHA1RNDS4 imm8, m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA1RNDS4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHA1RNDS4", 3, Operands { v0, v1, v2 })
|
|
// SHA1RNDS4 imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHA1RNDS4 imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x3a)
|
|
m.emit(0xcc)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA1RNDS4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA256MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords".
|
|
//
|
|
// Mnemonic : SHA256MSG1
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA256MSG1 xmm, xmm [SHA]
|
|
// * SHA256MSG1 m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA256MSG1(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHA256MSG1", 2, Operands { v0, v1 })
|
|
// SHA256MSG1 xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SHA256MSG1 m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA256MSG1")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA256MSG2 performs "Perform a Final Calculation for the Next Four SHA256 Message Doublewords".
|
|
//
|
|
// Mnemonic : SHA256MSG2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA256MSG2 xmm, xmm [SHA]
|
|
// * SHA256MSG2 m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA256MSG2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHA256MSG2", 2, Operands { v0, v1 })
|
|
// SHA256MSG2 xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SHA256MSG2 m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcd)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA256MSG2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHA256RNDS2 performs "Perform Two Rounds of SHA256 Operation".
|
|
//
|
|
// Mnemonic : SHA256RNDS2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHA256RNDS2 xmm0, xmm, xmm [SHA]
|
|
// * SHA256RNDS2 xmm0, m128, xmm [SHA]
|
|
//
|
|
func (self *Program) SHA256RNDS2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHA256RNDS2", 3, Operands { v0, v1, v2 })
|
|
// SHA256RNDS2 xmm0, xmm, xmm
|
|
if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHA256RNDS2 xmm0, m128, xmm
|
|
if v0 == XMM0 && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SHA)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x38)
|
|
m.emit(0xcb)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHA256RNDS2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLB performs "Logical Shift Left".
|
|
//
|
|
// Mnemonic : SHL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHLB 1, r8
|
|
// * SHLB imm8, r8
|
|
// * SHLB cl, r8
|
|
// * SHLB 1, m8
|
|
// * SHLB imm8, m8
|
|
// * SHLB cl, m8
|
|
//
|
|
func (self *Program) SHLB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHLB", 2, Operands { v0, v1 })
|
|
// SHLB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHLB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLDL performs "Integer Double Precision Shift Left".
|
|
//
|
|
// Mnemonic : SHLD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHLDL imm8, r32, r32
|
|
// * SHLDL cl, r32, r32
|
|
// * SHLDL imm8, r32, m32
|
|
// * SHLDL cl, r32, m32
|
|
//
|
|
func (self *Program) SHLDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHLDL", 3, Operands { v0, v1, v2 })
|
|
// SHLDL imm8, r32, r32
|
|
if isImm8(v0) && isReg32(v1) && isReg32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDL cl, r32, r32
|
|
if v0 == CL && isReg32(v1) && isReg32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHLDL imm8, r32, m32
|
|
if isImm8(v0) && isReg32(v1) && isM32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDL cl, r32, m32
|
|
if v0 == CL && isReg32(v1) && isM32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLDL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLDQ performs "Integer Double Precision Shift Left".
|
|
//
|
|
// Mnemonic : SHLD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHLDQ imm8, r64, r64
|
|
// * SHLDQ cl, r64, r64
|
|
// * SHLDQ imm8, r64, m64
|
|
// * SHLDQ cl, r64, m64
|
|
//
|
|
func (self *Program) SHLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHLDQ", 3, Operands { v0, v1, v2 })
|
|
// SHLDQ imm8, r64, r64
|
|
if isImm8(v0) && isReg64(v1) && isReg64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDQ cl, r64, r64
|
|
if v0 == CL && isReg64(v1) && isReg64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHLDQ imm8, r64, m64
|
|
if isImm8(v0) && isReg64(v1) && isM64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDQ cl, r64, m64
|
|
if v0 == CL && isReg64(v1) && isM64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLDW performs "Integer Double Precision Shift Left".
|
|
//
|
|
// Mnemonic : SHLD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHLDW imm8, r16, r16
|
|
// * SHLDW cl, r16, r16
|
|
// * SHLDW imm8, r16, m16
|
|
// * SHLDW cl, r16, m16
|
|
//
|
|
func (self *Program) SHLDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHLDW", 3, Operands { v0, v1, v2 })
|
|
// SHLDW imm8, r16, r16
|
|
if isImm8(v0) && isReg16(v1) && isReg16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDW cl, r16, r16
|
|
if v0 == CL && isReg16(v1) && isReg16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHLDW imm8, r16, m16
|
|
if isImm8(v0) && isReg16(v1) && isM16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa4)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLDW cl, r16, m16
|
|
if v0 == CL && isReg16(v1) && isM16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xa5)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLL performs "Logical Shift Left".
|
|
//
|
|
// Mnemonic : SHL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHLL 1, r32
|
|
// * SHLL imm8, r32
|
|
// * SHLL cl, r32
|
|
// * SHLL 1, m32
|
|
// * SHLL imm8, m32
|
|
// * SHLL cl, m32
|
|
//
|
|
func (self *Program) SHLL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHLL", 2, Operands { v0, v1 })
|
|
// SHLL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHLL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLQ performs "Logical Shift Left".
|
|
//
|
|
// Mnemonic : SHL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHLQ 1, r64
|
|
// * SHLQ imm8, r64
|
|
// * SHLQ cl, r64
|
|
// * SHLQ 1, m64
|
|
// * SHLQ imm8, m64
|
|
// * SHLQ cl, m64
|
|
//
|
|
func (self *Program) SHLQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHLQ", 2, Operands { v0, v1 })
|
|
// SHLQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHLQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLW performs "Logical Shift Left".
|
|
//
|
|
// Mnemonic : SHL
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHLW 1, r16
|
|
// * SHLW imm8, r16
|
|
// * SHLW cl, r16
|
|
// * SHLW 1, m16
|
|
// * SHLW imm8, m16
|
|
// * SHLW cl, m16
|
|
//
|
|
func (self *Program) SHLW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHLW", 2, Operands { v0, v1 })
|
|
// SHLW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHLW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHLW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(4, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLXL performs "Logical Shift Left Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SHLX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHLXL r32, r32, r32 [BMI2]
|
|
// * SHLXL r32, m32, r32 [BMI2]
|
|
//
|
|
func (self *Program) SHLXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHLXL", 3, Operands { v0, v1, v2 })
|
|
// SHLXL r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLXL r32, m32, r32
|
|
if isReg32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHLXQ performs "Logical Shift Left Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SHLX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHLXQ r64, r64, r64 [BMI2]
|
|
// * SHLXQ r64, m64, r64 [BMI2]
|
|
//
|
|
func (self *Program) SHLXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHLXQ", 3, Operands { v0, v1, v2 })
|
|
// SHLXQ r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHLXQ r64, m64, r64
|
|
if isReg64(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHLXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRB performs "Logical Shift Right".
|
|
//
|
|
// Mnemonic : SHR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHRB 1, r8
|
|
// * SHRB imm8, r8
|
|
// * SHRB cl, r8
|
|
// * SHRB 1, m8
|
|
// * SHRB imm8, m8
|
|
// * SHRB cl, m8
|
|
//
|
|
func (self *Program) SHRB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHRB", 2, Operands { v0, v1 })
|
|
// SHRB 1, r8
|
|
if isConst1(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xc0)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRB cl, r8
|
|
if v0 == CL && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRB 1, m8
|
|
if isConst1(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd0)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHRB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc0)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRB cl, m8
|
|
if v0 == CL && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd2)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRDL performs "Integer Double Precision Shift Right".
|
|
//
|
|
// Mnemonic : SHRD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHRDL imm8, r32, r32
|
|
// * SHRDL cl, r32, r32
|
|
// * SHRDL imm8, r32, m32
|
|
// * SHRDL cl, r32, m32
|
|
//
|
|
func (self *Program) SHRDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHRDL", 3, Operands { v0, v1, v2 })
|
|
// SHRDL imm8, r32, r32
|
|
if isImm8(v0) && isReg32(v1) && isReg32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDL cl, r32, r32
|
|
if v0 == CL && isReg32(v1) && isReg32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHRDL imm8, r32, m32
|
|
if isImm8(v0) && isReg32(v1) && isM32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDL cl, r32, m32
|
|
if v0 == CL && isReg32(v1) && isM32(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRDL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRDQ performs "Integer Double Precision Shift Right".
|
|
//
|
|
// Mnemonic : SHRD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHRDQ imm8, r64, r64
|
|
// * SHRDQ cl, r64, r64
|
|
// * SHRDQ imm8, r64, m64
|
|
// * SHRDQ cl, r64, m64
|
|
//
|
|
func (self *Program) SHRDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHRDQ", 3, Operands { v0, v1, v2 })
|
|
// SHRDQ imm8, r64, r64
|
|
if isImm8(v0) && isReg64(v1) && isReg64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDQ cl, r64, r64
|
|
if v0 == CL && isReg64(v1) && isReg64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHRDQ imm8, r64, m64
|
|
if isImm8(v0) && isReg64(v1) && isM64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDQ cl, r64, m64
|
|
if v0 == CL && isReg64(v1) && isM64(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[2]))
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRDW performs "Integer Double Precision Shift Right".
|
|
//
|
|
// Mnemonic : SHRD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * SHRDW imm8, r16, r16
|
|
// * SHRDW cl, r16, r16
|
|
// * SHRDW imm8, r16, m16
|
|
// * SHRDW cl, r16, m16
|
|
//
|
|
func (self *Program) SHRDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHRDW", 3, Operands { v0, v1, v2 })
|
|
// SHRDW imm8, r16, r16
|
|
if isImm8(v0) && isReg16(v1) && isReg16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDW cl, r16, r16
|
|
if v0 == CL && isReg16(v1) && isReg16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[2], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// SHRDW imm8, r16, m16
|
|
if isImm8(v0) && isReg16(v1) && isM16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRDW cl, r16, m16
|
|
if v0 == CL && isReg16(v1) && isM16(v2) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[2]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRL performs "Logical Shift Right".
|
|
//
|
|
// Mnemonic : SHR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHRL 1, r32
|
|
// * SHRL imm8, r32
|
|
// * SHRL cl, r32
|
|
// * SHRL 1, m32
|
|
// * SHRL imm8, m32
|
|
// * SHRL cl, m32
|
|
//
|
|
func (self *Program) SHRL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHRL", 2, Operands { v0, v1 })
|
|
// SHRL 1, r32
|
|
if isConst1(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRL imm8, r32
|
|
if isImm8(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRL cl, r32
|
|
if v0 == CL && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRL 1, m32
|
|
if isConst1(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHRL imm8, m32
|
|
if isImm8(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRL cl, m32
|
|
if v0 == CL && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRQ performs "Logical Shift Right".
|
|
//
|
|
// Mnemonic : SHR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHRQ 1, r64
|
|
// * SHRQ imm8, r64
|
|
// * SHRQ cl, r64
|
|
// * SHRQ 1, m64
|
|
// * SHRQ imm8, m64
|
|
// * SHRQ cl, m64
|
|
//
|
|
func (self *Program) SHRQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHRQ", 2, Operands { v0, v1 })
|
|
// SHRQ 1, r64
|
|
if isConst1(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRQ imm8, r64
|
|
if isImm8(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xc1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRQ cl, r64
|
|
if v0 == CL && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRQ 1, m64
|
|
if isConst1(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHRQ imm8, m64
|
|
if isImm8(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xc1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRQ cl, m64
|
|
if v0 == CL && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRW performs "Logical Shift Right".
|
|
//
|
|
// Mnemonic : SHR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SHRW 1, r16
|
|
// * SHRW imm8, r16
|
|
// * SHRW cl, r16
|
|
// * SHRW 1, m16
|
|
// * SHRW imm8, m16
|
|
// * SHRW cl, m16
|
|
//
|
|
func (self *Program) SHRW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SHRW", 2, Operands { v0, v1 })
|
|
// SHRW 1, r16
|
|
if isConst1(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRW imm8, r16
|
|
if isImm8(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xc1)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRW cl, r16
|
|
if v0 == CL && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xd3)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRW 1, m16
|
|
if isConst1(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
// SHRW imm8, m16
|
|
if isImm8(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xc1)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHRW cl, m16
|
|
if v0 == CL && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xd3)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRXL performs "Logical Shift Right Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SHRX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHRXL r32, r32, r32 [BMI2]
|
|
// * SHRXL r32, m32, r32 [BMI2]
|
|
//
|
|
func (self *Program) SHRXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHRXL", 3, Operands { v0, v1, v2 })
|
|
// SHRXL r32, r32, r32
|
|
if isReg32(v0) && isReg32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7b ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRXL r32, m32, r32
|
|
if isReg32(v0) && isM32(v1) && isReg32(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRXL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHRXQ performs "Logical Shift Right Without Affecting Flags".
|
|
//
|
|
// Mnemonic : SHRX
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHRXQ r64, r64, r64 [BMI2]
|
|
// * SHRXQ r64, m64, r64 [BMI2]
|
|
//
|
|
func (self *Program) SHRXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHRXQ", 3, Operands { v0, v1, v2 })
|
|
// SHRXQ r64, r64, r64
|
|
if isReg64(v0) && isReg64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfb ^ (hlcode(v[0]) << 3))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// SHRXQ r64, m64, r64
|
|
if isReg64(v0) && isM64(v1) && isReg64(v2) {
|
|
self.require(ISA_BMI2)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0xf7)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHRXQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SHUFPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHUFPD imm8, xmm, xmm [SSE2]
|
|
// * SHUFPD imm8, m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) SHUFPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHUFPD", 3, Operands { v0, v1, v2 })
|
|
// SHUFPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHUFPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHUFPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SHUFPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SHUFPS imm8, xmm, xmm [SSE]
|
|
// * SHUFPS imm8, m128, xmm [SSE]
|
|
//
|
|
func (self *Program) SHUFPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("SHUFPS", 3, Operands { v0, v1, v2 })
|
|
// SHUFPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SHUFPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[2]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SHUFPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SQRTPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SQRTPD xmm, xmm [SSE2]
|
|
// * SQRTPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) SQRTPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SQRTPD", 2, Operands { v0, v1 })
|
|
// SQRTPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SQRTPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SQRTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SQRTPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SQRTPS xmm, xmm [SSE]
|
|
// * SQRTPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) SQRTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SQRTPS", 2, Operands { v0, v1 })
|
|
// SQRTPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SQRTPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SQRTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : SQRTSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SQRTSD xmm, xmm [SSE2]
|
|
// * SQRTSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) SQRTSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SQRTSD", 2, Operands { v0, v1 })
|
|
// SQRTSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SQRTSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SQRTSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : SQRTSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SQRTSS xmm, xmm [SSE]
|
|
// * SQRTSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) SQRTSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SQRTSS", 2, Operands { v0, v1 })
|
|
// SQRTSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SQRTSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SQRTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// STC performs "Set Carry Flag".
|
|
//
|
|
// Mnemonic : STC
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * STC
|
|
//
|
|
func (self *Program) STC() *Instruction {
|
|
p := self.alloc("STC", 0, Operands { })
|
|
// STC
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf9)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// STD performs "Set Direction Flag".
|
|
//
|
|
// Mnemonic : STD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * STD
|
|
//
|
|
func (self *Program) STD() *Instruction {
|
|
p := self.alloc("STD", 0, Operands { })
|
|
// STD
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xfd)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// STMXCSR performs "Store MXCSR Register State".
|
|
//
|
|
// Mnemonic : STMXCSR
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * STMXCSR m32 [SSE]
|
|
//
|
|
func (self *Program) STMXCSR(v0 interface{}) *Instruction {
|
|
p := self.alloc("STMXCSR", 1, Operands { v0 })
|
|
// STMXCSR m32
|
|
if isM32(v0) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xae)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for STMXCSR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBB performs "Subtract".
|
|
//
|
|
// Mnemonic : SUB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * SUBB imm8, al
|
|
// * SUBB imm8, r8
|
|
// * SUBB r8, r8
|
|
// * SUBB m8, r8
|
|
// * SUBB imm8, m8
|
|
// * SUBB r8, m8
|
|
//
|
|
func (self *Program) SUBB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBB", 2, Operands { v0, v1 })
|
|
// SUBB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x2c)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SUBB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBL performs "Subtract".
|
|
//
|
|
// Mnemonic : SUB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SUBL imm32, eax
|
|
// * SUBL imm8, r32
|
|
// * SUBL imm32, r32
|
|
// * SUBL r32, r32
|
|
// * SUBL m32, r32
|
|
// * SUBL imm8, m32
|
|
// * SUBL imm32, m32
|
|
// * SUBL r32, m32
|
|
//
|
|
func (self *Program) SUBL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBL", 2, Operands { v0, v1 })
|
|
// SUBL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x2d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SUBL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBPD performs "Subtract Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SUBPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SUBPD xmm, xmm [SSE2]
|
|
// * SUBPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) SUBPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBPD", 2, Operands { v0, v1 })
|
|
// SUBPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBPS performs "Subtract Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SUBPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SUBPS xmm, xmm [SSE]
|
|
// * SUBPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) SUBPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBPS", 2, Operands { v0, v1 })
|
|
// SUBPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBQ performs "Subtract".
|
|
//
|
|
// Mnemonic : SUB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SUBQ imm32, rax
|
|
// * SUBQ imm8, r64
|
|
// * SUBQ imm32, r64
|
|
// * SUBQ r64, r64
|
|
// * SUBQ m64, r64
|
|
// * SUBQ imm8, m64
|
|
// * SUBQ imm32, m64
|
|
// * SUBQ r64, m64
|
|
//
|
|
func (self *Program) SUBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBQ", 2, Operands { v0, v1 })
|
|
// SUBQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x2d)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SUBQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBSD performs "Subtract Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SUBSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SUBSD xmm, xmm [SSE2]
|
|
// * SUBSD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) SUBSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBSD", 2, Operands { v0, v1 })
|
|
// SUBSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf2)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBSS performs "Subtract Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : SUBSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * SUBSS xmm, xmm [SSE]
|
|
// * SUBSS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) SUBSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBSS", 2, Operands { v0, v1 })
|
|
// SUBSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SUBW performs "Subtract".
|
|
//
|
|
// Mnemonic : SUB
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * SUBW imm16, ax
|
|
// * SUBW imm8, r16
|
|
// * SUBW imm16, r16
|
|
// * SUBW r16, r16
|
|
// * SUBW m16, r16
|
|
// * SUBW imm8, m16
|
|
// * SUBW imm16, m16
|
|
// * SUBW r16, m16
|
|
//
|
|
func (self *Program) SUBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("SUBW", 2, Operands { v0, v1 })
|
|
// SUBW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x2d)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xe8 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// SUBW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// SUBW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(5, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// SUBW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for SUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// SYSCALL performs "Fast System Call".
|
|
//
|
|
// Mnemonic : SYSCALL
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * SYSCALL
|
|
//
|
|
func (self *Program) SYSCALL() *Instruction {
|
|
p := self.alloc("SYSCALL", 0, Operands { })
|
|
// SYSCALL
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x05)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// T1MSKC performs "Inverse Mask From Trailing Ones".
|
|
//
|
|
// Mnemonic : T1MSKC
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * T1MSKC r32, r32 [TBM]
|
|
// * T1MSKC m32, r32 [TBM]
|
|
// * T1MSKC r64, r64 [TBM]
|
|
// * T1MSKC m64, r64 [TBM]
|
|
//
|
|
func (self *Program) T1MSKC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("T1MSKC", 2, Operands { v0, v1 })
|
|
// T1MSKC r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// T1MSKC m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// T1MSKC r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xf8 | lcode(v[0]))
|
|
})
|
|
}
|
|
// T1MSKC m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(7, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for T1MSKC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TESTB performs "Logical Compare".
|
|
//
|
|
// Mnemonic : TEST
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * TESTB imm8, al
|
|
// * TESTB imm8, r8
|
|
// * TESTB r8, r8
|
|
// * TESTB imm8, m8
|
|
// * TESTB r8, m8
|
|
//
|
|
func (self *Program) TESTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TESTB", 2, Operands { v0, v1 })
|
|
// TESTB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xa8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x84)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// TESTB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xf6)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x84)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TESTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TESTL performs "Logical Compare".
|
|
//
|
|
// Mnemonic : TEST
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * TESTL imm32, eax
|
|
// * TESTL imm32, r32
|
|
// * TESTL r32, r32
|
|
// * TESTL imm32, m32
|
|
// * TESTL r32, m32
|
|
//
|
|
func (self *Program) TESTL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TESTL", 2, Operands { v0, v1 })
|
|
// TESTL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xa9)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x85)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// TESTL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x85)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TESTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TESTQ performs "Logical Compare".
|
|
//
|
|
// Mnemonic : TEST
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * TESTQ imm32, rax
|
|
// * TESTQ imm32, r64
|
|
// * TESTQ r64, r64
|
|
// * TESTQ imm32, m64
|
|
// * TESTQ r64, m64
|
|
//
|
|
func (self *Program) TESTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TESTQ", 2, Operands { v0, v1 })
|
|
// TESTQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0xa9)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x85)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// TESTQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0xf7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x85)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TESTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TESTW performs "Logical Compare".
|
|
//
|
|
// Mnemonic : TEST
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * TESTW imm16, ax
|
|
// * TESTW imm16, r16
|
|
// * TESTW r16, r16
|
|
// * TESTW imm16, m16
|
|
// * TESTW r16, m16
|
|
//
|
|
func (self *Program) TESTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TESTW", 2, Operands { v0, v1 })
|
|
// TESTW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xa9)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x85)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// TESTW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0xf7)
|
|
m.mrsd(0, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// TESTW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x85)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TESTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TZCNTL performs "Count the Number of Trailing Zero Bits".
|
|
//
|
|
// Mnemonic : TZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * TZCNTL r32, r32 [BMI]
|
|
// * TZCNTL m32, r32 [BMI]
|
|
//
|
|
func (self *Program) TZCNTL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TZCNTL", 2, Operands { v0, v1 })
|
|
// TZCNTL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// TZCNTL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TZCNTL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TZCNTQ performs "Count the Number of Trailing Zero Bits".
|
|
//
|
|
// Mnemonic : TZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * TZCNTQ r64, r64 [BMI]
|
|
// * TZCNTQ m64, r64 [BMI]
|
|
//
|
|
func (self *Program) TZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TZCNTQ", 2, Operands { v0, v1 })
|
|
// TZCNTQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// TZCNTQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xf3)
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TZCNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TZCNTW performs "Count the Number of Trailing Zero Bits".
|
|
//
|
|
// Mnemonic : TZCNT
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * TZCNTW r16, r16 [BMI]
|
|
// * TZCNTW m16, r16 [BMI]
|
|
//
|
|
func (self *Program) TZCNTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TZCNTW", 2, Operands { v0, v1 })
|
|
// TZCNTW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// TZCNTW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
self.require(ISA_BMI)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0xf3)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TZCNTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// TZMSK performs "Mask From Trailing Zeros".
|
|
//
|
|
// Mnemonic : TZMSK
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * TZMSK r32, r32 [TBM]
|
|
// * TZMSK m32, r32 [TBM]
|
|
// * TZMSK r64, r64 [TBM]
|
|
// * TZMSK m64, r64 [TBM]
|
|
//
|
|
func (self *Program) TZMSK(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("TZMSK", 2, Operands { v0, v1 })
|
|
// TZMSK r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// TZMSK m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
// TZMSK r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xe0 | lcode(v[0]))
|
|
})
|
|
}
|
|
// TZMSK m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_TBM)
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(4, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for TZMSK")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : UCOMISD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UCOMISD xmm, xmm [SSE2]
|
|
// * UCOMISD m64, xmm [SSE2]
|
|
//
|
|
func (self *Program) UCOMISD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UCOMISD", 2, Operands { v0, v1 })
|
|
// UCOMISD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UCOMISD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UCOMISD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : UCOMISS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UCOMISS xmm, xmm [SSE]
|
|
// * UCOMISS m32, xmm [SSE]
|
|
//
|
|
func (self *Program) UCOMISS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UCOMISS", 2, Operands { v0, v1 })
|
|
// UCOMISS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UCOMISS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UCOMISS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UD2 performs "Undefined Instruction".
|
|
//
|
|
// Mnemonic : UD2
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * UD2
|
|
//
|
|
func (self *Program) UD2() *Instruction {
|
|
p := self.alloc("UD2", 0, Operands { })
|
|
// UD2
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x0b)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// UNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : UNPCKHPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UNPCKHPD xmm, xmm [SSE2]
|
|
// * UNPCKHPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) UNPCKHPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UNPCKHPD", 2, Operands { v0, v1 })
|
|
// UNPCKHPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UNPCKHPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UNPCKHPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : UNPCKHPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UNPCKHPS xmm, xmm [SSE]
|
|
// * UNPCKHPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) UNPCKHPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UNPCKHPS", 2, Operands { v0, v1 })
|
|
// UNPCKHPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UNPCKHPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UNPCKHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : UNPCKLPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UNPCKLPD xmm, xmm [SSE2]
|
|
// * UNPCKLPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) UNPCKLPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UNPCKLPD", 2, Operands { v0, v1 })
|
|
// UNPCKLPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UNPCKLPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UNPCKLPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// UNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : UNPCKLPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * UNPCKLPS xmm, xmm [SSE]
|
|
// * UNPCKLPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) UNPCKLPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("UNPCKLPS", 2, Operands { v0, v1 })
|
|
// UNPCKLPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// UNPCKLPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for UNPCKLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDPD performs "Add Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VADDPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VADDPD xmm, xmm, xmm [AVX]
|
|
// * VADDPD m128, xmm, xmm [AVX]
|
|
// * VADDPD ymm, ymm, ymm [AVX]
|
|
// * VADDPD m256, ymm, ymm [AVX]
|
|
// * VADDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VADDPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VADDPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VADDPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VADDPD takes 3 or 4 operands")
|
|
}
|
|
// VADDPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VADDPD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VADDPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VADDPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VADDPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDPS performs "Add Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VADDPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VADDPS xmm, xmm, xmm [AVX]
|
|
// * VADDPS m128, xmm, xmm [AVX]
|
|
// * VADDPS ymm, ymm, ymm [AVX]
|
|
// * VADDPS m256, ymm, ymm [AVX]
|
|
// * VADDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VADDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VADDPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VADDPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VADDPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VADDPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VADDPS takes 3 or 4 operands")
|
|
}
|
|
// VADDPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VADDPS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VADDPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VADDPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VADDPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDSD performs "Add Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VADDSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VADDSD xmm, xmm, xmm [AVX]
|
|
// * VADDSD m64, xmm, xmm [AVX]
|
|
// * VADDSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VADDSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VADDSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VADDSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VADDSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VADDSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VADDSD takes 3 or 4 operands")
|
|
}
|
|
// VADDSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VADDSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VADDSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDSS performs "Add Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VADDSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VADDSS xmm, xmm, xmm [AVX]
|
|
// * VADDSS m32, xmm, xmm [AVX]
|
|
// * VADDSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VADDSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VADDSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VADDSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VADDSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VADDSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VADDSS takes 3 or 4 operands")
|
|
}
|
|
// VADDSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VADDSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VADDSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDSUBPD performs "Packed Double-FP Add/Subtract".
|
|
//
|
|
// Mnemonic : VADDSUBPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VADDSUBPD xmm, xmm, xmm [AVX]
|
|
// * VADDSUBPD m128, xmm, xmm [AVX]
|
|
// * VADDSUBPD ymm, ymm, ymm [AVX]
|
|
// * VADDSUBPD m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VADDSUBPD", 3, Operands { v0, v1, v2 })
|
|
// VADDSUBPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSUBPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDSUBPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSUBPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VADDSUBPS performs "Packed Single-FP Add/Subtract".
|
|
//
|
|
// Mnemonic : VADDSUBPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VADDSUBPS xmm, xmm, xmm [AVX]
|
|
// * VADDSUBPS m128, xmm, xmm [AVX]
|
|
// * VADDSUBPS ymm, ymm, ymm [AVX]
|
|
// * VADDSUBPS m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VADDSUBPS", 3, Operands { v0, v1, v2 })
|
|
// VADDSUBPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSUBPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VADDSUBPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VADDSUBPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VADDSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESDEC performs "Perform One Round of an AES Decryption Flow".
|
|
//
|
|
// Mnemonic : VAESDEC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESDEC xmm, xmm, xmm [AES,AVX]
|
|
// * VAESDEC m128, xmm, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESDEC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VAESDEC", 3, Operands { v0, v1, v2 })
|
|
// VAESDEC xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VAESDEC m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESDEC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESDECLAST performs "Perform Last Round of an AES Decryption Flow".
|
|
//
|
|
// Mnemonic : VAESDECLAST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESDECLAST xmm, xmm, xmm [AES,AVX]
|
|
// * VAESDECLAST m128, xmm, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESDECLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VAESDECLAST", 3, Operands { v0, v1, v2 })
|
|
// VAESDECLAST xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VAESDECLAST m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESDECLAST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESENC performs "Perform One Round of an AES Encryption Flow".
|
|
//
|
|
// Mnemonic : VAESENC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESENC xmm, xmm, xmm [AES,AVX]
|
|
// * VAESENC m128, xmm, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESENC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VAESENC", 3, Operands { v0, v1, v2 })
|
|
// VAESENC xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VAESENC m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESENC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESENCLAST performs "Perform Last Round of an AES Encryption Flow".
|
|
//
|
|
// Mnemonic : VAESENCLAST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESENCLAST xmm, xmm, xmm [AES,AVX]
|
|
// * VAESENCLAST m128, xmm, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESENCLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VAESENCLAST", 3, Operands { v0, v1, v2 })
|
|
// VAESENCLAST xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VAESENCLAST m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESENCLAST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESIMC performs "Perform the AES InvMixColumn Transformation".
|
|
//
|
|
// Mnemonic : VAESIMC
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESIMC xmm, xmm [AES,AVX]
|
|
// * VAESIMC m128, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESIMC(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VAESIMC", 2, Operands { v0, v1 })
|
|
// VAESIMC xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VAESIMC m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESIMC")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VAESKEYGENASSIST performs "AES Round Key Generation Assist".
|
|
//
|
|
// Mnemonic : VAESKEYGENASSIST
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VAESKEYGENASSIST imm8, xmm, xmm [AES,AVX]
|
|
// * VAESKEYGENASSIST imm8, m128, xmm [AES,AVX]
|
|
//
|
|
func (self *Program) VAESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VAESKEYGENASSIST", 3, Operands { v0, v1, v2 })
|
|
// VAESKEYGENASSIST imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VAESKEYGENASSIST imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX | ISA_AES)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VAESKEYGENASSIST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VALIGND performs "Align Doubleword Vectors".
|
|
//
|
|
// Mnemonic : VALIGND
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VALIGND imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGND imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGND imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VALIGND(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VALIGND", 4, Operands { v0, v1, v2, v3 })
|
|
// VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGND imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGND imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGND imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VALIGND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VALIGNQ performs "Align Quadword Vectors".
|
|
//
|
|
// Mnemonic : VALIGNQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VALIGNQ imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGNQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VALIGNQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VALIGNQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VALIGNQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGNQ imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGNQ imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VALIGNQ imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VALIGNQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VANDNPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VANDNPD xmm, xmm, xmm [AVX]
|
|
// * VANDNPD m128, xmm, xmm [AVX]
|
|
// * VANDNPD ymm, ymm, ymm [AVX]
|
|
// * VANDNPD m256, ymm, ymm [AVX]
|
|
// * VANDNPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDNPD zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDNPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VANDNPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VANDNPD", 3, Operands { v0, v1, v2 })
|
|
// VANDNPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDNPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDNPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VANDNPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VANDNPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VANDNPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VANDNPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VANDNPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VANDNPS xmm, xmm, xmm [AVX]
|
|
// * VANDNPS m128, xmm, xmm [AVX]
|
|
// * VANDNPS ymm, ymm, ymm [AVX]
|
|
// * VANDNPS m256, ymm, ymm [AVX]
|
|
// * VANDNPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDNPS zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDNPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDNPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VANDNPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VANDNPS", 3, Operands { v0, v1, v2 })
|
|
// VANDNPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDNPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDNPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VANDNPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VANDNPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDNPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VANDNPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VANDNPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VANDPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VANDPD xmm, xmm, xmm [AVX]
|
|
// * VANDPD m128, xmm, xmm [AVX]
|
|
// * VANDPD ymm, ymm, ymm [AVX]
|
|
// * VANDPD m256, ymm, ymm [AVX]
|
|
// * VANDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDPD zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VANDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VANDPD", 3, Operands { v0, v1, v2 })
|
|
// VANDPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VANDPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VANDPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VANDPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VANDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VANDPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VANDPS xmm, xmm, xmm [AVX]
|
|
// * VANDPS m128, xmm, xmm [AVX]
|
|
// * VANDPS ymm, ymm, ymm [AVX]
|
|
// * VANDPS m256, ymm, ymm [AVX]
|
|
// * VANDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDPS zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VANDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VANDPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VANDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VANDPS", 3, Operands { v0, v1, v2 })
|
|
// VANDPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VANDPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VANDPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VANDPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VANDPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VANDPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VANDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDMPD performs "Blend Packed Double-Precision Floating-Point Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VBLENDMPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VBLENDMPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBLENDMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDMPD", 3, Operands { v0, v1, v2 })
|
|
// VBLENDMPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VBLENDMPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBLENDMPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBLENDMPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBLENDMPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VBLENDMPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDMPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDMPS performs "Blend Packed Single-Precision Floating-Point Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VBLENDMPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VBLENDMPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBLENDMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBLENDMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDMPS", 3, Operands { v0, v1, v2 })
|
|
// VBLENDMPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VBLENDMPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBLENDMPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBLENDMPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBLENDMPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VBLENDMPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDMPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDPD performs "Blend Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VBLENDPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VBLENDPD imm8, xmm, xmm, xmm [AVX]
|
|
// * VBLENDPD imm8, m128, xmm, xmm [AVX]
|
|
// * VBLENDPD imm8, ymm, ymm, ymm [AVX]
|
|
// * VBLENDPD imm8, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VBLENDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VBLENDPD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPD imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPD imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDPS performs " Blend Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VBLENDPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VBLENDPS imm8, xmm, xmm, xmm [AVX]
|
|
// * VBLENDPS imm8, m128, xmm, xmm [AVX]
|
|
// * VBLENDPS imm8, ymm, ymm, ymm [AVX]
|
|
// * VBLENDPS imm8, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VBLENDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VBLENDPS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPS imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPS imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VBLENDPS imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VBLENDVPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VBLENDVPD xmm, xmm, xmm, xmm [AVX]
|
|
// * VBLENDVPD xmm, m128, xmm, xmm [AVX]
|
|
// * VBLENDVPD ymm, ymm, ymm, ymm [AVX]
|
|
// * VBLENDVPD ymm, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VBLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDVPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VBLENDVPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDVPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VBLENDVPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VBLENDVPS xmm, xmm, xmm, xmm [AVX]
|
|
// * VBLENDVPS xmm, m128, xmm, xmm [AVX]
|
|
// * VBLENDVPS ymm, ymm, ymm, ymm [AVX]
|
|
// * VBLENDVPS ymm, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VBLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VBLENDVPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VBLENDVPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VBLENDVPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBLENDVPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF128 performs "Broadcast 128 Bit of Floating-Point Data".
|
|
//
|
|
// Mnemonic : VBROADCASTF128
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTF128 m128, ymm [AVX]
|
|
//
|
|
func (self *Program) VBROADCASTF128(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF128", 2, Operands { v0, v1 })
|
|
// VBROADCASTF128 m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF32X2 performs "Broadcast Two Single-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTF32X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VBROADCASTF32X2 xmm, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTF32X2 m64, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTF32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VBROADCASTF32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTF32X2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF32X2", 2, Operands { v0, v1 })
|
|
// VBROADCASTF32X2 xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTF32X2 m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VBROADCASTF32X2 xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTF32X2 m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF32X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF32X4 performs "Broadcast Four Single-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTF32X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VBROADCASTF32X4 m128, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTF32X4 m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTF32X4(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF32X4", 2, Operands { v0, v1 })
|
|
// VBROADCASTF32X4 m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBROADCASTF32X4 m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF32X8 performs "Broadcast Eight Single-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTF32X8
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTF32X8 m256, zmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VBROADCASTF32X8(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF32X8", 2, Operands { v0, v1 })
|
|
// VBROADCASTF32X8 m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF64X2 performs "Broadcast Two Double-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTF64X2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VBROADCASTF64X2 m128, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTF64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTF64X2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF64X2", 2, Operands { v0, v1 })
|
|
// VBROADCASTF64X2 m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBROADCASTF64X2 m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTF64X4 performs "Broadcast Four Double-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTF64X4
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTF64X4 m256, zmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VBROADCASTF64X4(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTF64X4", 2, Operands { v0, v1 })
|
|
// VBROADCASTF64X4 m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTF64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI128 performs "Broadcast 128 Bits of Integer Data".
|
|
//
|
|
// Mnemonic : VBROADCASTI128
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTI128 m128, ymm [AVX2]
|
|
//
|
|
func (self *Program) VBROADCASTI128(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI128", 2, Operands { v0, v1 })
|
|
// VBROADCASTI128 m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI32X2 performs "Broadcast Two Doubleword Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTI32X2
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VBROADCASTI32X2 xmm, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTI32X2 m64, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTI32X2 xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VBROADCASTI32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VBROADCASTI32X2 m64, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VBROADCASTI32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTI32X2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI32X2", 2, Operands { v0, v1 })
|
|
// VBROADCASTI32X2 xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTI32X2 m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VBROADCASTI32X2 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTI32X2 xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTI32X2 m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VBROADCASTI32X2 m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI32X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI32X4 performs "Broadcast Four Doubleword Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTI32X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VBROADCASTI32X4 m128, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTI32X4 m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTI32X4(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI32X4", 2, Operands { v0, v1 })
|
|
// VBROADCASTI32X4 m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBROADCASTI32X4 m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI32X8 performs "Broadcast Eight Doubleword Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTI32X8
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTI32X8 m256, zmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VBROADCASTI32X8(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI32X8", 2, Operands { v0, v1 })
|
|
// VBROADCASTI32X8 m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI64X2 performs "Broadcast Two Quadword Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTI64X2
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VBROADCASTI64X2 m128, zmm{k}{z} [AVX512DQ]
|
|
// * VBROADCASTI64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTI64X2(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI64X2", 2, Operands { v0, v1 })
|
|
// VBROADCASTI64X2 m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VBROADCASTI64X2 m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTI64X4 performs "Broadcast Four Quadword Elements".
|
|
//
|
|
// Mnemonic : VBROADCASTI64X4
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VBROADCASTI64X4 m256, zmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VBROADCASTI64X4(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTI64X4", 2, Operands { v0, v1 })
|
|
// VBROADCASTI64X4 m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTI64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTSD performs "Broadcast Double-Precision Floating-Point Element".
|
|
//
|
|
// Mnemonic : VBROADCASTSD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VBROADCASTSD m64, ymm [AVX]
|
|
// * VBROADCASTSD xmm, ymm [AVX2]
|
|
// * VBROADCASTSD xmm, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTSD m64, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTSD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBROADCASTSD m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTSD", 2, Operands { v0, v1 })
|
|
// VBROADCASTSD m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VBROADCASTSD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSD xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSD m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VBROADCASTSD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSD m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VBROADCASTSS performs "Broadcast Single-Precision Floating-Point Element".
|
|
//
|
|
// Mnemonic : VBROADCASTSS
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * VBROADCASTSS m32, xmm [AVX]
|
|
// * VBROADCASTSS m32, ymm [AVX]
|
|
// * VBROADCASTSS xmm, xmm [AVX2]
|
|
// * VBROADCASTSS xmm, ymm [AVX2]
|
|
// * VBROADCASTSS xmm, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTSS m32, zmm{k}{z} [AVX512F]
|
|
// * VBROADCASTSS xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VBROADCASTSS m32, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VBROADCASTSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VBROADCASTSS", 2, Operands { v0, v1 })
|
|
// VBROADCASTSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VBROADCASTSS m32, ymm
|
|
if isM32(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VBROADCASTSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSS xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSS xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSS m32, zmm{k}{z}
|
|
if isM32(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VBROADCASTSS xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VBROADCASTSS m32, ymm{k}{z}
|
|
if isM32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VBROADCASTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCMPPD performs "Compare Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCMPPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCMPPD imm8, xmm, xmm, xmm [AVX]
|
|
// * VCMPPD imm8, m128, xmm, xmm [AVX]
|
|
// * VCMPPD imm8, ymm, ymm, ymm [AVX]
|
|
// * VCMPPD imm8, m256, ymm, ymm [AVX]
|
|
// * VCMPPD imm8, m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VCMPPD imm8, {sae}, zmm, zmm, k{k} [AVX512F]
|
|
// * VCMPPD imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VCMPPD imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPD imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCMPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCMPPD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VCMPPD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VCMPPD takes 4 or 5 operands")
|
|
}
|
|
// VCMPPD imm8, xmm, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, m128, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, ymm, ymm, ymm
|
|
if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, m256, ymm, ymm
|
|
if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, m512/m64bcst, zmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, {sae}, zmm, zmm, k{k}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, zmm, zmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, m128/m64bcst, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, xmm, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, m256/m64bcst, ymm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPD imm8, ymm, ymm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCMPPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCMPPS performs "Compare Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCMPPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCMPPS imm8, xmm, xmm, xmm [AVX]
|
|
// * VCMPPS imm8, m128, xmm, xmm [AVX]
|
|
// * VCMPPS imm8, ymm, ymm, ymm [AVX]
|
|
// * VCMPPS imm8, m256, ymm, ymm [AVX]
|
|
// * VCMPPS imm8, m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VCMPPS imm8, {sae}, zmm, zmm, k{k} [AVX512F]
|
|
// * VCMPPS imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VCMPPS imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPS imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPS imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VCMPPS imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCMPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCMPPS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VCMPPS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VCMPPS takes 4 or 5 operands")
|
|
}
|
|
// VCMPPS imm8, xmm, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, m128, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, ymm, ymm, ymm
|
|
if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, m256, ymm, ymm
|
|
if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, m512/m32bcst, zmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, {sae}, zmm, zmm, k{k}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[3]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, zmm, zmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, m128/m32bcst, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, xmm, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, m256/m32bcst, ymm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPPS imm8, ymm, ymm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCMPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCMPSD performs "Compare Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCMPSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCMPSD imm8, xmm, xmm, xmm [AVX]
|
|
// * VCMPSD imm8, m64, xmm, xmm [AVX]
|
|
// * VCMPSD imm8, m64, xmm, k{k} [AVX512F]
|
|
// * VCMPSD imm8, {sae}, xmm, xmm, k{k} [AVX512F]
|
|
// * VCMPSD imm8, xmm, xmm, k{k} [AVX512F]
|
|
//
|
|
func (self *Program) VCMPSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCMPSD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VCMPSD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VCMPSD takes 4 or 5 operands")
|
|
}
|
|
// VCMPSD imm8, xmm, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSD imm8, m64, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSD imm8, m64, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSD imm8, {sae}, xmm, xmm, k{k}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[3]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSD imm8, xmm, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCMPSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCMPSS performs "Compare Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCMPSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCMPSS imm8, xmm, xmm, xmm [AVX]
|
|
// * VCMPSS imm8, m32, xmm, xmm [AVX]
|
|
// * VCMPSS imm8, m32, xmm, k{k} [AVX512F]
|
|
// * VCMPSS imm8, {sae}, xmm, xmm, k{k} [AVX512F]
|
|
// * VCMPSS imm8, xmm, xmm, k{k} [AVX512F]
|
|
//
|
|
func (self *Program) VCMPSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCMPSS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VCMPSS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VCMPSS takes 4 or 5 operands")
|
|
}
|
|
// VCMPSS imm8, xmm, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSS imm8, m32, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSS imm8, m32, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSS imm8, {sae}, xmm, xmm, k{k}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[3]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCMPSS imm8, xmm, xmm, k{k}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCMPSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCOMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : VCOMISD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCOMISD xmm, xmm [AVX]
|
|
// * VCOMISD m64, xmm [AVX]
|
|
// * VCOMISD m64, xmm [AVX512F]
|
|
// * VCOMISD {sae}, xmm, xmm [AVX512F]
|
|
// * VCOMISD xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCOMISD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCOMISD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCOMISD takes 2 or 3 operands")
|
|
}
|
|
// VCOMISD xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCOMISD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCOMISD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCOMISD {sae}, xmm, xmm
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x18)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMISD xmm, xmm
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x48)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCOMISD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCOMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : VCOMISS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCOMISS xmm, xmm [AVX]
|
|
// * VCOMISS m32, xmm [AVX]
|
|
// * VCOMISS m32, xmm [AVX512F]
|
|
// * VCOMISS {sae}, xmm, xmm [AVX512F]
|
|
// * VCOMISS xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCOMISS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCOMISS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCOMISS takes 2 or 3 operands")
|
|
}
|
|
// VCOMISS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCOMISS m32, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCOMISS m32, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCOMISS {sae}, xmm, xmm
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit(0x18)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMISS xmm, xmm
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit(0x48)
|
|
m.emit(0x2f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCOMISS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCOMPRESSPD performs "Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VCOMPRESSPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCOMPRESSPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VCOMPRESSPD zmm, m512{k}{z} [AVX512F]
|
|
// * VCOMPRESSPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPD xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPD ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCOMPRESSPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VCOMPRESSPD", 2, Operands { v0, v1 })
|
|
// VCOMPRESSPD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPD zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VCOMPRESSPD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPD xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VCOMPRESSPD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPD ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCOMPRESSPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCOMPRESSPS performs "Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VCOMPRESSPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCOMPRESSPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VCOMPRESSPS zmm, m512{k}{z} [AVX512F]
|
|
// * VCOMPRESSPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPS xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCOMPRESSPS ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCOMPRESSPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VCOMPRESSPS", 2, Operands { v0, v1 })
|
|
// VCOMPRESSPS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPS zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VCOMPRESSPS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPS xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VCOMPRESSPS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x8a)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCOMPRESSPS ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8a)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCOMPRESSPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
|
|
//
|
|
// Mnemonic : VCVTDQ2PD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTDQ2PD xmm, xmm [AVX]
|
|
// * VCVTDQ2PD m64, xmm [AVX]
|
|
// * VCVTDQ2PD xmm, ymm [AVX]
|
|
// * VCVTDQ2PD m128, ymm [AVX]
|
|
// * VCVTDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTDQ2PD ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VCVTDQ2PD", 2, Operands { v0, v1 })
|
|
// VCVTDQ2PD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTDQ2PD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PD m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTDQ2PD m256/m32bcst, zmm{k}{z}
|
|
if isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTDQ2PD ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PD m64/m32bcst, xmm{k}{z}
|
|
if isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTDQ2PD m128/m32bcst, ymm{k}{z}
|
|
if isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTDQ2PD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTDQ2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : VCVTDQ2PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTDQ2PS xmm, xmm [AVX]
|
|
// * VCVTDQ2PS m128, xmm [AVX]
|
|
// * VCVTDQ2PS ymm, ymm [AVX]
|
|
// * VCVTDQ2PS m256, ymm [AVX]
|
|
// * VCVTDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTDQ2PS {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTDQ2PS zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTDQ2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTDQ2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTDQ2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTDQ2PS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PS m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTDQ2PS ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PS m256, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTDQ2PS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTDQ2PS {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTDQ2PS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PS m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTDQ2PS m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTDQ2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTDQ2PS ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTDQ2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : VCVTPD2DQ
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPD2DQ xmm, xmm [AVX]
|
|
// * VCVTPD2DQ ymm, xmm [AVX]
|
|
// * VCVTPD2DQ m128, xmm [AVX]
|
|
// * VCVTPD2DQ m256, xmm [AVX]
|
|
// * VCVTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2DQ {er}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2DQ zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPD2DQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPD2DQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPD2DQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPD2DQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2DQ ymm, xmm
|
|
if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2DQ m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPD2DQ m256, xmm
|
|
if len(vv) == 0 && isM256(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPD2DQ m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPD2DQ {er}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPD2DQ zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2DQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPD2DQ m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPD2DQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2DQ ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPD2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : VCVTPD2PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPD2PS xmm, xmm [AVX]
|
|
// * VCVTPD2PS ymm, xmm [AVX]
|
|
// * VCVTPD2PS m128, xmm [AVX]
|
|
// * VCVTPD2PS m256, xmm [AVX]
|
|
// * VCVTPD2PS m512/m64bcst, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2PS {er}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2PS zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2PS m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2PS m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2PS ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPD2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPD2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPD2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPD2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTPD2PS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2PS ymm, xmm
|
|
if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2PS m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPD2PS m256, xmm
|
|
if len(vv) == 0 && isM256(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPD2PS m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPD2PS {er}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPD2PS zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2PS m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPD2PS m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPD2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2PS ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPD2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPD2QQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VCVTPD2QQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2QQ {er}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2QQ zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPD2QQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPD2QQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPD2QQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPD2QQ m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPD2QQ {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPD2QQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2QQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPD2QQ m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPD2QQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2QQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPD2QQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPD2UDQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VCVTPD2UDQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2UDQ {er}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2UDQ zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPD2UDQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPD2UDQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPD2UDQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPD2UDQ m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPD2UDQ {er}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPD2UDQ zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2UDQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPD2UDQ m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPD2UDQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2UDQ ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPD2UDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPD2UQQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers".
|
|
//
|
|
// Mnemonic : VCVTPD2UQQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2UQQ {er}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2UQQ zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPD2UQQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPD2UQQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPD2UQQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPD2UQQ m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPD2UQQ {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPD2UQQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2UQQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPD2UQQ m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPD2UQQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPD2UQQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPD2UQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPH2PS performs "Convert Half-Precision FP Values to Single-Precision FP Values".
|
|
//
|
|
// Mnemonic : VCVTPH2PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPH2PS xmm, xmm [F16C]
|
|
// * VCVTPH2PS m64, xmm [F16C]
|
|
// * VCVTPH2PS xmm, ymm [F16C]
|
|
// * VCVTPH2PS m128, ymm [F16C]
|
|
// * VCVTPH2PS m256, zmm{k}{z} [AVX512F]
|
|
// * VCVTPH2PS {sae}, ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPH2PS ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPH2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPH2PS xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPH2PS m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPH2PS m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPH2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPH2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPH2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPH2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTPH2PS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPH2PS m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPH2PS xmm, ymm
|
|
if len(vv) == 0 && isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPH2PS m128, ymm
|
|
if len(vv) == 0 && isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPH2PS m256, zmm{k}{z}
|
|
if len(vv) == 0 && isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPH2PS {sae}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPH2PS ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPH2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPH2PS xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPH2PS m64, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTPH2PS m128, ymm{k}{z}
|
|
if len(vv) == 0 && isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPH2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : VCVTPS2DQ
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPS2DQ xmm, xmm [AVX]
|
|
// * VCVTPS2DQ m128, xmm [AVX]
|
|
// * VCVTPS2DQ ymm, ymm [AVX]
|
|
// * VCVTPS2DQ m256, ymm [AVX]
|
|
// * VCVTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2DQ {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2DQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2DQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPS2DQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPS2DQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPS2DQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2DQ m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPS2DQ ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2DQ m256, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPS2DQ m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPS2DQ {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPS2DQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2DQ m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPS2DQ m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPS2DQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2DQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values".
|
|
//
|
|
// Mnemonic : VCVTPS2PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPS2PD xmm, xmm [AVX]
|
|
// * VCVTPS2PD m64, xmm [AVX]
|
|
// * VCVTPS2PD xmm, ymm [AVX]
|
|
// * VCVTPS2PD m128, ymm [AVX]
|
|
// * VCVTPS2PD m256/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2PD {sae}, ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2PD ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2PD m128/m32bcst, ymm{k}{z} [AVX512VL]
|
|
// * VCVTPS2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2PD xmm, ymm{k}{z} [AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPS2PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPS2PD takes 2 or 3 operands")
|
|
}
|
|
// VCVTPS2PD xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPS2PD xmm, ymm
|
|
if len(vv) == 0 && isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PD m128, ymm
|
|
if len(vv) == 0 && isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTPS2PD m256/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPS2PD {sae}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPS2PD ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PD m64/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTPS2PD m128/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPS2PD xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PD xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2PH performs "Convert Single-Precision FP value to Half-Precision FP value".
|
|
//
|
|
// Mnemonic : VCVTPS2PH
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTPS2PH imm8, xmm, xmm [F16C]
|
|
// * VCVTPS2PH imm8, ymm, xmm [F16C]
|
|
// * VCVTPS2PH imm8, xmm, m64 [F16C]
|
|
// * VCVTPS2PH imm8, ymm, m128 [F16C]
|
|
// * VCVTPS2PH imm8, zmm, m256{k}{z} [AVX512F]
|
|
// * VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPS2PH imm8, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTPS2PH imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2PH imm8, xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2PH imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2PH imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2PH(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2PH", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTPS2PH", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTPS2PH takes 3 or 4 operands")
|
|
}
|
|
// VCVTPS2PH imm8, xmm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, ymm, xmm
|
|
if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isXMM(v2) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, xmm, m64
|
|
if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isM64(v2) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, ymm, m128
|
|
if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isM128(v2) {
|
|
self.require(ISA_F16C)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, zmm, m256{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isM256kz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[3]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[3]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, xmm, m64{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isM64kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2PH imm8, ymm, m128{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2PH")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2QQ performs "Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTPS2QQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2QQ {er}, ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2QQ ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2QQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPS2QQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPS2QQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPS2QQ m256/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPS2QQ {er}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPS2QQ ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2QQ m64/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTPS2QQ m128/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPS2QQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2QQ xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2QQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2UDQ performs "Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTPS2UDQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2UDQ {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2UDQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2UDQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPS2UDQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPS2UDQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPS2UDQ m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTPS2UDQ {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPS2UDQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2UDQ m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPS2UDQ m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPS2UDQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2UDQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2UDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTPS2UQQ performs "Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTPS2UQQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2UQQ {er}, ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2UQQ ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTPS2UQQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTPS2UQQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTPS2UQQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTPS2UQQ m256/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTPS2UQQ {er}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTPS2UQQ ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2UQQ m64/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTPS2UQQ m128/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTPS2UQQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTPS2UQQ xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTPS2UQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTQQ2PD performs "Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTQQ2PD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PD zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTQQ2PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTQQ2PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTQQ2PD takes 2 or 3 operands")
|
|
}
|
|
// VCVTQQ2PD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTQQ2PD {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTQQ2PD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTQQ2PD m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTQQ2PD m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTQQ2PD xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTQQ2PD ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTQQ2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTQQ2PS performs "Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTQQ2PS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PS zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTQQ2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTQQ2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTQQ2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTQQ2PS m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTQQ2PS {er}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTQQ2PS zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTQQ2PS m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTQQ2PS m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTQQ2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTQQ2PS ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTQQ2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer".
|
|
//
|
|
// Mnemonic : VCVTSD2SI
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTSD2SI xmm, r32 [AVX]
|
|
// * VCVTSD2SI m64, r32 [AVX]
|
|
// * VCVTSD2SI xmm, r64 [AVX]
|
|
// * VCVTSD2SI m64, r64 [AVX]
|
|
// * VCVTSD2SI m64, r32 [AVX512F]
|
|
// * VCVTSD2SI m64, r64 [AVX512F]
|
|
// * VCVTSD2SI {er}, xmm, r32 [AVX512F]
|
|
// * VCVTSD2SI {er}, xmm, r64 [AVX512F]
|
|
// * VCVTSD2SI xmm, r32 [AVX512F]
|
|
// * VCVTSD2SI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSD2SI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTSD2SI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTSD2SI takes 2 or 3 operands")
|
|
}
|
|
// VCVTSD2SI xmm, r32
|
|
if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSD2SI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSD2SI xmm, r64
|
|
if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSD2SI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSD2SI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSD2SI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSD2SI {er}, xmm, r32
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSD2SI {er}, xmm, r64
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSD2SI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x48)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSD2SI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x48)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSD2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value".
|
|
//
|
|
// Mnemonic : VCVTSD2SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCVTSD2SS xmm, xmm, xmm [AVX]
|
|
// * VCVTSD2SS m64, xmm, xmm [AVX]
|
|
// * VCVTSD2SS m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VCVTSD2SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSD2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSD2SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTSD2SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTSD2SS takes 3 or 4 operands")
|
|
}
|
|
// VCVTSD2SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSD2SS m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSD2SS m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSD2SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSD2SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSD2SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSD2USI performs "Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer".
|
|
//
|
|
// Mnemonic : VCVTSD2USI
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTSD2USI m64, r32 [AVX512F]
|
|
// * VCVTSD2USI m64, r64 [AVX512F]
|
|
// * VCVTSD2USI {er}, xmm, r32 [AVX512F]
|
|
// * VCVTSD2USI {er}, xmm, r64 [AVX512F]
|
|
// * VCVTSD2USI xmm, r32 [AVX512F]
|
|
// * VCVTSD2USI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSD2USI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTSD2USI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTSD2USI takes 2 or 3 operands")
|
|
}
|
|
// VCVTSD2USI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSD2USI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSD2USI {er}, xmm, r32
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSD2USI {er}, xmm, r64
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSD2USI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSD2USI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSD2USI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value".
|
|
//
|
|
// Mnemonic : VCVTSI2SD
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VCVTSI2SD r32, xmm, xmm [AVX]
|
|
// * VCVTSI2SD r64, xmm, xmm [AVX]
|
|
// * VCVTSI2SD m32, xmm, xmm [AVX]
|
|
// * VCVTSI2SD m64, xmm, xmm [AVX]
|
|
// * VCVTSI2SD r32, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SD m32, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SD m64, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SD {er}, r64, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SD r64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSI2SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTSI2SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTSI2SD takes 3 or 4 operands")
|
|
}
|
|
// VCVTSI2SD r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SD r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SD m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSI2SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSI2SD r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SD m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSI2SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSI2SD {er}, r64, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSI2SD r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSI2SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value".
|
|
//
|
|
// Mnemonic : VCVTSI2SS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTSI2SS r32, xmm, xmm [AVX]
|
|
// * VCVTSI2SS r64, xmm, xmm [AVX]
|
|
// * VCVTSI2SS m32, xmm, xmm [AVX]
|
|
// * VCVTSI2SS m64, xmm, xmm [AVX]
|
|
// * VCVTSI2SS m32, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SS m64, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SS {er}, r32, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SS {er}, r64, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SS r32, xmm, xmm [AVX512F]
|
|
// * VCVTSI2SS r64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSI2SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTSI2SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTSI2SS takes 3 or 4 operands")
|
|
}
|
|
// VCVTSI2SS r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SS r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfa ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSI2SS m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSI2SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSI2SS m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTSI2SS {er}, r32, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSI2SS {er}, r64, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSI2SS r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSI2SS r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSI2SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value".
|
|
//
|
|
// Mnemonic : VCVTSS2SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCVTSS2SD xmm, xmm, xmm [AVX]
|
|
// * VCVTSS2SD m32, xmm, xmm [AVX]
|
|
// * VCVTSS2SD m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VCVTSS2SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSS2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSS2SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTSS2SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTSS2SD takes 3 or 4 operands")
|
|
}
|
|
// VCVTSS2SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSS2SD m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSS2SD m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSS2SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSS2SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer".
|
|
//
|
|
// Mnemonic : VCVTSS2SI
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTSS2SI xmm, r32 [AVX]
|
|
// * VCVTSS2SI m32, r32 [AVX]
|
|
// * VCVTSS2SI xmm, r64 [AVX]
|
|
// * VCVTSS2SI m32, r64 [AVX]
|
|
// * VCVTSS2SI m32, r32 [AVX512F]
|
|
// * VCVTSS2SI m32, r64 [AVX512F]
|
|
// * VCVTSS2SI {er}, xmm, r32 [AVX512F]
|
|
// * VCVTSS2SI {er}, xmm, r64 [AVX512F]
|
|
// * VCVTSS2SI xmm, r32 [AVX512F]
|
|
// * VCVTSS2SI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSS2SI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTSS2SI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTSS2SI takes 2 or 3 operands")
|
|
}
|
|
// VCVTSS2SI xmm, r32
|
|
if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSS2SI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSS2SI xmm, r64
|
|
if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfa)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSS2SI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTSS2SI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSS2SI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSS2SI {er}, xmm, r32
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSS2SI {er}, xmm, r64
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSS2SI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSS2SI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSS2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTSS2USI performs "Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer".
|
|
//
|
|
// Mnemonic : VCVTSS2USI
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTSS2USI m32, r32 [AVX512F]
|
|
// * VCVTSS2USI m32, r64 [AVX512F]
|
|
// * VCVTSS2USI {er}, xmm, r32 [AVX512F]
|
|
// * VCVTSS2USI {er}, xmm, r64 [AVX512F]
|
|
// * VCVTSS2USI xmm, r32 [AVX512F]
|
|
// * VCVTSS2USI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTSS2USI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTSS2USI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTSS2USI takes 2 or 3 operands")
|
|
}
|
|
// VCVTSS2USI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSS2USI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTSS2USI {er}, xmm, r32
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSS2USI {er}, xmm, r64
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((vcode(v[0]) << 5) | 0x18)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTSS2USI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTSS2USI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTSS2USI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : VCVTTPD2DQ
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTTPD2DQ xmm, xmm [AVX]
|
|
// * VCVTTPD2DQ ymm, xmm [AVX]
|
|
// * VCVTTPD2DQ m128, xmm [AVX]
|
|
// * VCVTTPD2DQ m256, xmm [AVX]
|
|
// * VCVTTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2DQ {sae}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2DQ zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPD2DQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPD2DQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPD2DQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPD2DQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2DQ ymm, xmm
|
|
if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2DQ m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTPD2DQ m256, xmm
|
|
if len(vv) == 0 && isM256(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTPD2DQ m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPD2DQ {sae}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPD2DQ zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2DQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPD2DQ m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xe6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPD2DQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2DQ ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xe6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPD2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPD2QQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VCVTTPD2QQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2QQ {sae}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2QQ zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPD2QQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPD2QQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPD2QQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPD2QQ m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPD2QQ {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPD2QQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2QQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPD2QQ m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPD2QQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2QQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPD2QQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPD2UDQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VCVTTPD2UDQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2UDQ zmm, ymm{k}{z} [AVX512F]
|
|
// * VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPD2UDQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPD2UDQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPD2UDQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPD2UDQ m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ {sae}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2UDQ ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfc)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPD2UDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPD2UQQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers".
|
|
//
|
|
// Mnemonic : VCVTTPD2UQQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2UQQ zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPD2UQQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPD2UQQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPD2UQQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPD2UQQ m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPD2UQQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPD2UQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
|
|
//
|
|
// Mnemonic : VCVTTPS2DQ
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VCVTTPS2DQ xmm, xmm [AVX]
|
|
// * VCVTTPS2DQ m128, xmm [AVX]
|
|
// * VCVTTPS2DQ ymm, ymm [AVX]
|
|
// * VCVTTPS2DQ m256, ymm [AVX]
|
|
// * VCVTTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2DQ {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2DQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPS2DQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPS2DQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPS2DQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPS2DQ xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2DQ m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTPS2DQ ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), v[0], 0)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2DQ m256, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTPS2DQ m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPS2DQ {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPS2DQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2DQ m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPS2DQ m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x5b)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPS2DQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2DQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x5b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPS2DQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPS2QQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTTPS2QQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2QQ {sae}, ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2QQ ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPS2QQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPS2QQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPS2QQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPS2QQ m256/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPS2QQ {sae}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPS2QQ ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2QQ m64/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTPS2QQ m128/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPS2QQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2QQ xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPS2QQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPS2UDQ performs "Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTTPS2UDQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2UDQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPS2UDQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPS2UDQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPS2UDQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPS2UDQ m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2UDQ ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPS2UDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTPS2UQQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VCVTTPS2UQQ
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2UQQ ymm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTPS2UQQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTPS2UQQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTPS2UQQ takes 2 or 3 operands")
|
|
}
|
|
// VCVTTPS2UQQ m256/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ {sae}, ymm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ ymm, zmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ m64/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ m128/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTPS2UQQ xmm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTPS2UQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer".
|
|
//
|
|
// Mnemonic : VCVTTSD2SI
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTTSD2SI xmm, r32 [AVX]
|
|
// * VCVTTSD2SI m64, r32 [AVX]
|
|
// * VCVTTSD2SI xmm, r64 [AVX]
|
|
// * VCVTTSD2SI m64, r64 [AVX]
|
|
// * VCVTTSD2SI m64, r32 [AVX512F]
|
|
// * VCVTTSD2SI m64, r64 [AVX512F]
|
|
// * VCVTTSD2SI {sae}, xmm, r32 [AVX512F]
|
|
// * VCVTTSD2SI {sae}, xmm, r64 [AVX512F]
|
|
// * VCVTTSD2SI xmm, r32 [AVX512F]
|
|
// * VCVTTSD2SI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTSD2SI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTSD2SI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTSD2SI takes 2 or 3 operands")
|
|
}
|
|
// VCVTTSD2SI xmm, r32
|
|
if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSD2SI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTSD2SI xmm, r64
|
|
if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfb)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSD2SI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTSD2SI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTSD2SI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTSD2SI {sae}, xmm, r32
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x18)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSD2SI {sae}, xmm, r64
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x18)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSD2SI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x48)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSD2SI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x48)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTSD2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTSD2USI performs "Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer".
|
|
//
|
|
// Mnemonic : VCVTTSD2USI
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTTSD2USI m64, r32 [AVX512F]
|
|
// * VCVTTSD2USI m64, r64 [AVX512F]
|
|
// * VCVTTSD2USI {sae}, xmm, r32 [AVX512F]
|
|
// * VCVTTSD2USI {sae}, xmm, r64 [AVX512F]
|
|
// * VCVTTSD2USI xmm, r32 [AVX512F]
|
|
// * VCVTTSD2USI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTSD2USI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTSD2USI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTSD2USI takes 2 or 3 operands")
|
|
}
|
|
// VCVTTSD2USI m64, r32
|
|
if len(vv) == 0 && isM64(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTSD2USI m64, r64
|
|
if len(vv) == 0 && isM64(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTTSD2USI {sae}, xmm, r32
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSD2USI {sae}, xmm, r64
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSD2USI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit(0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSD2USI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit(0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTSD2USI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer".
|
|
//
|
|
// Mnemonic : VCVTTSS2SI
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VCVTTSS2SI xmm, r32 [AVX]
|
|
// * VCVTTSS2SI m32, r32 [AVX]
|
|
// * VCVTTSS2SI xmm, r64 [AVX]
|
|
// * VCVTTSS2SI m32, r64 [AVX]
|
|
// * VCVTTSS2SI m32, r32 [AVX512F]
|
|
// * VCVTTSS2SI m32, r64 [AVX512F]
|
|
// * VCVTTSS2SI {sae}, xmm, r32 [AVX512F]
|
|
// * VCVTTSS2SI {sae}, xmm, r64 [AVX512F]
|
|
// * VCVTTSS2SI xmm, r32 [AVX512F]
|
|
// * VCVTTSS2SI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTSS2SI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTSS2SI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTSS2SI takes 2 or 3 operands")
|
|
}
|
|
// VCVTTSS2SI xmm, r32
|
|
if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSS2SI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTSS2SI xmm, r64
|
|
if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfa)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSS2SI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VCVTTSS2SI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTTSS2SI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTTSS2SI {sae}, xmm, r32
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x18)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSS2SI {sae}, xmm, r64
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x18)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSS2SI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSS2SI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTSS2SI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTTSS2USI performs "Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer".
|
|
//
|
|
// Mnemonic : VCVTTSS2USI
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTTSS2USI m32, r32 [AVX512F]
|
|
// * VCVTTSS2USI m32, r64 [AVX512F]
|
|
// * VCVTTSS2USI {sae}, xmm, r32 [AVX512F]
|
|
// * VCVTTSS2USI {sae}, xmm, r64 [AVX512F]
|
|
// * VCVTTSS2USI xmm, r32 [AVX512F]
|
|
// * VCVTTSS2USI xmm, r64 [AVX512F]
|
|
//
|
|
func (self *Program) VCVTTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTTSS2USI", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTTSS2USI", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTTSS2USI takes 2 or 3 operands")
|
|
}
|
|
// VCVTTSS2USI m32, r32
|
|
if len(vv) == 0 && isM32(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTTSS2USI m32, r64
|
|
if len(vv) == 0 && isM32(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTTSS2USI {sae}, xmm, r32
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSS2USI {sae}, xmm, r64
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x18)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTTSS2USI xmm, r32
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTTSS2USI xmm, r64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTTSS2USI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUDQ2PD performs "Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTUDQ2PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTUDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTUDQ2PD ymm, zmm{k}{z} [AVX512F]
|
|
// * VCVTUDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTUDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VCVTUDQ2PD", 2, Operands { v0, v1 })
|
|
// VCVTUDQ2PD m256/m32bcst, zmm{k}{z}
|
|
if isM256M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTUDQ2PD ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUDQ2PD m64/m32bcst, xmm{k}{z}
|
|
if isM64M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTUDQ2PD m128/m32bcst, ymm{k}{z}
|
|
if isM128M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTUDQ2PD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUDQ2PD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUDQ2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUDQ2PS performs "Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTUDQ2PS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTUDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VCVTUDQ2PS {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTUDQ2PS zmm, zmm{k}{z} [AVX512F]
|
|
// * VCVTUDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VCVTUDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTUDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTUDQ2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTUDQ2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTUDQ2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTUDQ2PS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTUDQ2PS {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUDQ2PS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUDQ2PS m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTUDQ2PS m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTUDQ2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUDQ2PS ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUDQ2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUQQ2PD performs "Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTUQQ2PD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTUQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PD zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTUQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTUQQ2PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTUQQ2PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTUQQ2PD takes 2 or 3 operands")
|
|
}
|
|
// VCVTUQQ2PD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTUQQ2PD {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PD m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTUQQ2PD m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTUQQ2PD xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PD ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUQQ2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUQQ2PS performs "Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VCVTUQQ2PS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VCVTUQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PS zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VCVTUQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VCVTUQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VCVTUQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTUQQ2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VCVTUQQ2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VCVTUQQ2PS takes 2 or 3 operands")
|
|
}
|
|
// VCVTUQQ2PS m512/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VCVTUQQ2PS {er}, zmm, ymm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PS zmm, ymm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PS m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VCVTUQQ2PS m256/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VCVTUQQ2PS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUQQ2PS ymm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUQQ2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUSI2SD performs "Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VCVTUSI2SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VCVTUSI2SD r32, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SD m32, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SD m64, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SD {er}, r64, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SD r64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCVTUSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTUSI2SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTUSI2SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTUSI2SD takes 3 or 4 operands")
|
|
}
|
|
// VCVTUSI2SD r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUSI2SD m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTUSI2SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTUSI2SD {er}, r64, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUSI2SD r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUSI2SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VCVTUSI2SS performs "Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VCVTUSI2SS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VCVTUSI2SS m32, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SS m64, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SS {er}, r32, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SS {er}, r64, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SS r32, xmm, xmm [AVX512F]
|
|
// * VCVTUSI2SS r64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VCVTUSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VCVTUSI2SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VCVTUSI2SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VCVTUSI2SS takes 3 or 4 operands")
|
|
}
|
|
// VCVTUSI2SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VCVTUSI2SS m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VCVTUSI2SS {er}, r32, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUSI2SS {er}, r64, xmm, xmm
|
|
if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[2]) << 3))
|
|
m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VCVTUSI2SS r32, xmm, xmm
|
|
if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VCVTUSI2SS r64, xmm, xmm
|
|
if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VCVTUSI2SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDBPSADBW performs "Double Block Packed Sum-Absolute-Differences on Unsigned Bytes".
|
|
//
|
|
// Mnemonic : VDBPSADBW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VDBPSADBW imm8, zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VDBPSADBW imm8, m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VDBPSADBW imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VDBPSADBW imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VDBPSADBW imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VDBPSADBW imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VDBPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VDBPSADBW", 4, Operands { v0, v1, v2, v3 })
|
|
// VDBPSADBW imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDBPSADBW imm8, m512, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDBPSADBW imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDBPSADBW imm8, m128, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDBPSADBW imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDBPSADBW imm8, m256, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDBPSADBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDIVPD performs "Divide Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDIVPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VDIVPD xmm, xmm, xmm [AVX]
|
|
// * VDIVPD m128, xmm, xmm [AVX]
|
|
// * VDIVPD ymm, ymm, ymm [AVX]
|
|
// * VDIVPD m256, ymm, ymm [AVX]
|
|
// * VDIVPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VDIVPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VDIVPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VDIVPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VDIVPD takes 3 or 4 operands")
|
|
}
|
|
// VDIVPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VDIVPD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VDIVPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VDIVPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VDIVPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDIVPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDIVPS performs "Divide Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDIVPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VDIVPS xmm, xmm, xmm [AVX]
|
|
// * VDIVPS m128, xmm, xmm [AVX]
|
|
// * VDIVPS ymm, ymm, ymm [AVX]
|
|
// * VDIVPS m256, ymm, ymm [AVX]
|
|
// * VDIVPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VDIVPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VDIVPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VDIVPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VDIVPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VDIVPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VDIVPS takes 3 or 4 operands")
|
|
}
|
|
// VDIVPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VDIVPS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VDIVPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VDIVPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VDIVPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDIVPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDIVSD performs "Divide Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDIVSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VDIVSD xmm, xmm, xmm [AVX]
|
|
// * VDIVSD m64, xmm, xmm [AVX]
|
|
// * VDIVSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VDIVSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VDIVSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VDIVSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VDIVSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VDIVSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VDIVSD takes 3 or 4 operands")
|
|
}
|
|
// VDIVSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VDIVSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VDIVSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDIVSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDIVSS performs "Divide Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDIVSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VDIVSS xmm, xmm, xmm [AVX]
|
|
// * VDIVSS m32, xmm, xmm [AVX]
|
|
// * VDIVSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VDIVSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VDIVSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VDIVSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VDIVSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VDIVSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VDIVSS takes 3 or 4 operands")
|
|
}
|
|
// VDIVSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VDIVSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VDIVSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VDIVSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VDIVSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDIVSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDPPD performs "Dot Product of Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDPPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VDPPD imm8, xmm, xmm, xmm [AVX]
|
|
// * VDPPD imm8, m128, xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VDPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VDPPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VDPPD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDPPD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDPPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VDPPS performs "Dot Product of Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VDPPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VDPPS imm8, xmm, xmm, xmm [AVX]
|
|
// * VDPPS imm8, m128, xmm, xmm [AVX]
|
|
// * VDPPS imm8, ymm, ymm, ymm [AVX]
|
|
// * VDPPS imm8, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VDPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VDPPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VDPPS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDPPS imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDPPS imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VDPPS imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VDPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXP2PD performs "Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error".
|
|
//
|
|
// Mnemonic : VEXP2PD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VEXP2PD m512/m64bcst, zmm{k}{z} [AVX512ER]
|
|
// * VEXP2PD {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VEXP2PD zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VEXP2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VEXP2PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VEXP2PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VEXP2PD takes 2 or 3 operands")
|
|
}
|
|
// VEXP2PD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VEXP2PD {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xc8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VEXP2PD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xc8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXP2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXP2PS performs "Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error".
|
|
//
|
|
// Mnemonic : VEXP2PS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VEXP2PS m512/m32bcst, zmm{k}{z} [AVX512ER]
|
|
// * VEXP2PS {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VEXP2PS zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VEXP2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VEXP2PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VEXP2PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VEXP2PS takes 2 or 3 operands")
|
|
}
|
|
// VEXP2PS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc8)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VEXP2PS {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xc8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VEXP2PS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xc8)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXP2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXPANDPD performs "Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory".
|
|
//
|
|
// Mnemonic : VEXPANDPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VEXPANDPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VEXPANDPD m512, zmm{k}{z} [AVX512F]
|
|
// * VEXPANDPD xmm, xmm{k}{z} [AVX512VL]
|
|
// * VEXPANDPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXPANDPD m128, xmm{k}{z} [AVX512VL]
|
|
// * VEXPANDPD m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VEXPANDPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VEXPANDPD", 2, Operands { v0, v1 })
|
|
// VEXPANDPD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPD m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VEXPANDPD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPD m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VEXPANDPD m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXPANDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXPANDPS performs "Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory".
|
|
//
|
|
// Mnemonic : VEXPANDPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VEXPANDPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VEXPANDPS m512, zmm{k}{z} [AVX512F]
|
|
// * VEXPANDPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXPANDPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXPANDPS m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXPANDPS m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VEXPANDPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VEXPANDPS", 2, Operands { v0, v1 })
|
|
// VEXPANDPS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPS m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VEXPANDPS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x88)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VEXPANDPS m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VEXPANDPS m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x88)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXPANDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTF128 performs "Extract Packed Floating-Point Values".
|
|
//
|
|
// Mnemonic : VEXTRACTF128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTF128 imm8, ymm, xmm [AVX]
|
|
// * VEXTRACTF128 imm8, ymm, m128 [AVX]
|
|
//
|
|
func (self *Program) VEXTRACTF128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTF128", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTF128 imm8, ymm, xmm
|
|
if isImm8(v0) && isYMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF128 imm8, ymm, m128
|
|
if isImm8(v0) && isYMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTF128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTF32X4 performs "Extract 128 Bits of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VEXTRACTF32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VEXTRACTF32X4 imm8, zmm, xmm{k}{z} [AVX512F]
|
|
// * VEXTRACTF32X4 imm8, zmm, m128{k}{z} [AVX512F]
|
|
// * VEXTRACTF32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXTRACTF32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VEXTRACTF32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTF32X4", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTF32X4 imm8, zmm, xmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF32X4 imm8, zmm, m128{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF32X4 imm8, ymm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF32X4 imm8, ymm, m128{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTF32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTF32X8 performs "Extract 256 Bits of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VEXTRACTF32X8
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTF32X8 imm8, zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VEXTRACTF32X8 imm8, zmm, m256{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VEXTRACTF32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTF32X8", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTF32X8 imm8, zmm, ymm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x1b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF32X8 imm8, zmm, m256{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTF32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTF64X2 performs "Extract 128 Bits of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VEXTRACTF64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VEXTRACTF64X2 imm8, zmm, xmm{k}{z} [AVX512DQ]
|
|
// * VEXTRACTF64X2 imm8, zmm, m128{k}{z} [AVX512DQ]
|
|
// * VEXTRACTF64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VEXTRACTF64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VEXTRACTF64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTF64X2", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTF64X2 imm8, zmm, xmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF64X2 imm8, zmm, m128{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF64X2 imm8, ymm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x19)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF64X2 imm8, ymm, m128{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x19)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTF64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTF64X4 performs "Extract 256 Bits of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VEXTRACTF64X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTF64X4 imm8, zmm, ymm{k}{z} [AVX512F]
|
|
// * VEXTRACTF64X4 imm8, zmm, m256{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VEXTRACTF64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTF64X4", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTF64X4 imm8, zmm, ymm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x1b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTF64X4 imm8, zmm, m256{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x1b)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTF64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTI128 performs "Extract Packed Integer Values".
|
|
//
|
|
// Mnemonic : VEXTRACTI128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTI128 imm8, ymm, xmm [AVX2]
|
|
// * VEXTRACTI128 imm8, ymm, m128 [AVX2]
|
|
//
|
|
func (self *Program) VEXTRACTI128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTI128", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTI128 imm8, ymm, xmm
|
|
if isImm8(v0) && isYMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI128 imm8, ymm, m128
|
|
if isImm8(v0) && isYMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTI128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTI32X4 performs "Extract 128 Bits of Packed Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VEXTRACTI32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VEXTRACTI32X4 imm8, zmm, xmm{k}{z} [AVX512F]
|
|
// * VEXTRACTI32X4 imm8, zmm, m128{k}{z} [AVX512F]
|
|
// * VEXTRACTI32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VEXTRACTI32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VEXTRACTI32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTI32X4", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTI32X4 imm8, zmm, xmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI32X4 imm8, zmm, m128{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI32X4 imm8, ymm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI32X4 imm8, ymm, m128{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTI32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTI32X8 performs "Extract 256 Bits of Packed Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VEXTRACTI32X8
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTI32X8 imm8, zmm, ymm{k}{z} [AVX512DQ]
|
|
// * VEXTRACTI32X8 imm8, zmm, m256{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VEXTRACTI32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTI32X8", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTI32X8 imm8, zmm, ymm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI32X8 imm8, zmm, m256{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTI32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTI64X2 performs "Extract 128 Bits of Packed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VEXTRACTI64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VEXTRACTI64X2 imm8, zmm, xmm{k}{z} [AVX512DQ]
|
|
// * VEXTRACTI64X2 imm8, zmm, m128{k}{z} [AVX512DQ]
|
|
// * VEXTRACTI64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VEXTRACTI64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VEXTRACTI64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTI64X2", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTI64X2 imm8, zmm, xmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI64X2 imm8, zmm, m128{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI64X2 imm8, ymm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI64X2 imm8, ymm, m128{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTI64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTI64X4 performs "Extract 256 Bits of Packed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VEXTRACTI64X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VEXTRACTI64X4 imm8, zmm, ymm{k}{z} [AVX512F]
|
|
// * VEXTRACTI64X4 imm8, zmm, m256{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VEXTRACTI64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTI64X4", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTI64X4 imm8, zmm, ymm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTI64X4 imm8, zmm, m256{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTI64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VEXTRACTPS performs "Extract Packed Single Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VEXTRACTPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VEXTRACTPS imm8, xmm, r32 [AVX]
|
|
// * VEXTRACTPS imm8, xmm, m32 [AVX]
|
|
// * VEXTRACTPS imm8, xmm, r32 [AVX512F]
|
|
// * VEXTRACTPS imm8, xmm, m32 [AVX512F]
|
|
//
|
|
func (self *Program) VEXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VEXTRACTPS", 3, Operands { v0, v1, v2 })
|
|
// VEXTRACTPS imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTPS imm8, xmm, m32
|
|
if isImm8(v0) && isXMM(v1) && isM32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTPS imm8, xmm, r32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VEXTRACTPS imm8, xmm, m32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VEXTRACTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFIXUPIMMPD performs "Fix Up Special Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFIXUPIMMPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFIXUPIMMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFIXUPIMMPD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VFIXUPIMMPD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VFIXUPIMMPD takes 4 or 5 operands")
|
|
}
|
|
// VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFIXUPIMMPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFIXUPIMMPS performs "Fix Up Special Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFIXUPIMMPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512VL]
|
|
// * VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} [AVX512VL]
|
|
// * VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFIXUPIMMPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFIXUPIMMPS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VFIXUPIMMPS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VFIXUPIMMPS takes 4 or 5 operands")
|
|
}
|
|
// VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x54)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x54)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFIXUPIMMPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFIXUPIMMSD performs "Fix Up Special Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VFIXUPIMMSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFIXUPIMMSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFIXUPIMMSD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VFIXUPIMMSD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VFIXUPIMMSD takes 4 or 5 operands")
|
|
}
|
|
// VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFIXUPIMMSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFIXUPIMMSS performs "Fix Up Special Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VFIXUPIMMSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFIXUPIMMSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFIXUPIMMSS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VFIXUPIMMSS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VFIXUPIMMSS takes 4 or 5 operands")
|
|
}
|
|
// VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFIXUPIMMSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD132PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD132PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD132PD m128, xmm, xmm [FMA3]
|
|
// * VFMADD132PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADD132PD m256, ymm, ymm [FMA3]
|
|
// * VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD132PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD132PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD132PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD132PS m128, xmm, xmm [FMA3]
|
|
// * VFMADD132PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADD132PS m256, ymm, ymm [FMA3]
|
|
// * VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD132PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD132SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD132SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD132SD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD132SD m64, xmm, xmm [FMA3]
|
|
// * VFMADD132SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD132SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD132SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD132SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD132SD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD132SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMADD132SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD132SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD132SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD132SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD132SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD132SS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD132SS m32, xmm, xmm [FMA3]
|
|
// * VFMADD132SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD132SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD132SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD132SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD132SS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD132SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD132SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD132SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMADD132SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD132SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD132SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD213PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD213PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD213PD m128, xmm, xmm [FMA3]
|
|
// * VFMADD213PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADD213PD m256, ymm, ymm [FMA3]
|
|
// * VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD213PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD213PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD213PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD213PS m128, xmm, xmm [FMA3]
|
|
// * VFMADD213PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADD213PS m256, ymm, ymm [FMA3]
|
|
// * VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD213PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD213SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD213SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD213SD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD213SD m64, xmm, xmm [FMA3]
|
|
// * VFMADD213SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD213SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD213SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD213SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD213SD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD213SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xa9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMADD213SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD213SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD213SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD213SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD213SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD213SS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD213SS m32, xmm, xmm [FMA3]
|
|
// * VFMADD213SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD213SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD213SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD213SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD213SS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD213SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD213SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD213SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xa9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMADD213SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD213SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD213SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD231PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD231PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD231PD m128, xmm, xmm [FMA3]
|
|
// * VFMADD231PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADD231PD m256, ymm, ymm [FMA3]
|
|
// * VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD231PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD231PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADD231PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD231PS m128, xmm, xmm [FMA3]
|
|
// * VFMADD231PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADD231PS m256, ymm, ymm [FMA3]
|
|
// * VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD231PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADD231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADD231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADD231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD231SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD231SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD231SD xmm, xmm, xmm [FMA3]
|
|
// * VFMADD231SD m64, xmm, xmm [FMA3]
|
|
// * VFMADD231SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD231SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD231SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD231SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD231SD takes 3 or 4 operands")
|
|
}
|
|
// VFMADD231SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xb9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMADD231SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD231SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD231SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADD231SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADD231SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMADD231SS xmm, xmm, xmm [FMA3]
|
|
// * VFMADD231SS m32, xmm, xmm [FMA3]
|
|
// * VFMADD231SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMADD231SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADD231SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADD231SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADD231SS takes 3 or 4 operands")
|
|
}
|
|
// VFMADD231SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADD231SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADD231SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xb9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMADD231SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADD231SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADD231SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDPD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMADDPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMADDPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDPS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMADDPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMADDPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFMADDSD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSD m64, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSD xmm, m64, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDSD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDSD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSD m64, xmm, xmm, xmm
|
|
if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSD xmm, m64, xmm, xmm
|
|
if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFMADDSS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSS m32, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSS xmm, m32, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDSS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDSS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSS m32, xmm, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSS xmm, m32, xmm, xmm
|
|
if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB132PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB132PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB132PD m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB132PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB132PD m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB132PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB132PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB132PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB132PS m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB132PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB132PS m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB132PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB213PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB213PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB213PD m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB213PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB213PD m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB213PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB213PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB213PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB213PS m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB213PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB213PS m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB213PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB231PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB231PD xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB231PD m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB231PD ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB231PD m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB231PD takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUB231PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUB231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMADDSUB231PS xmm, xmm, xmm [FMA3]
|
|
// * VFMADDSUB231PS m128, xmm, xmm [FMA3]
|
|
// * VFMADDSUB231PS ymm, ymm, ymm [FMA3]
|
|
// * VFMADDSUB231PS m256, ymm, ymm [FMA3]
|
|
// * VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMADDSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMADDSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMADDSUB231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMADDSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMADDSUB231PS takes 3 or 4 operands")
|
|
}
|
|
// VFMADDSUB231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMADDSUB231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMADDSUB231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUB231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUBPD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUBPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMADDSUBPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDSUBPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDSUBPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDSUBPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDSUBPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMADDSUBPS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMADDSUBPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMADDSUBPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMADDSUBPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDSUBPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMADDSUBPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMADDSUBPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMADDSUBPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMADDSUBPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMADDSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB132PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB132PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB132PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUB132PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB132PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB132PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB132PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB132PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB132PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUB132PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB132PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB132PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB132SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB132SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB132SD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB132SD m64, xmm, xmm [FMA3]
|
|
// * VFMSUB132SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB132SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB132SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB132SD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB132SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMSUB132SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB132SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB132SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB132SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB132SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB132SS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB132SS m32, xmm, xmm [FMA3]
|
|
// * VFMSUB132SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB132SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB132SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB132SS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB132SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB132SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB132SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMSUB132SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB132SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB132SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB213PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB213PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB213PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUB213PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB213PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB213PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB213PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB213PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB213PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUB213PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB213PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB213PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xaa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xaa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB213SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB213SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB213SD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB213SD m64, xmm, xmm [FMA3]
|
|
// * VFMSUB213SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB213SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB213SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB213SD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB213SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMSUB213SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB213SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB213SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB213SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB213SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB213SS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB213SS m32, xmm, xmm [FMA3]
|
|
// * VFMSUB213SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB213SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB213SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB213SS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB213SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB213SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB213SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xab)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMSUB213SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB213SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xab)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB213SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB231PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB231PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB231PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUB231PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB231PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB231PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB231PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUB231PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB231PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUB231PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUB231PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB231PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUB231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUB231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xba)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUB231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xba)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB231SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB231SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB231SD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB231SD m64, xmm, xmm [FMA3]
|
|
// * VFMSUB231SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB231SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB231SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB231SD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB231SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFMSUB231SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB231SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB231SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUB231SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUB231SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFMSUB231SS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUB231SS m32, xmm, xmm [FMA3]
|
|
// * VFMSUB231SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUB231SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUB231SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUB231SS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUB231SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUB231SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUB231SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFMSUB231SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUB231SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUB231SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD132PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD132PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD132PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD132PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD132PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD132PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD132PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD132PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD132PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD132PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD132PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD132PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD213PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD213PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD213PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD213PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD213PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD213PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD213PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD213PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD213PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD213PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD213PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD213PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xa7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xa7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD231PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD231PD xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD231PD m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD231PD ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD231PD m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD231PD takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADD231PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADD231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFMSUBADD231PS xmm, xmm, xmm [FMA3]
|
|
// * VFMSUBADD231PS m128, xmm, xmm [FMA3]
|
|
// * VFMSUBADD231PS ymm, ymm, ymm [FMA3]
|
|
// * VFMSUBADD231PS m256, ymm, ymm [FMA3]
|
|
// * VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFMSUBADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFMSUBADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFMSUBADD231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFMSUBADD231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFMSUBADD231PS takes 3 or 4 operands")
|
|
}
|
|
// VFMSUBADD231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFMSUBADD231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb7)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFMSUBADD231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb7)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADD231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADDPD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADDPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMSUBADDPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBADDPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBADDPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBADDPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBADDPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBADDPS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBADDPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMSUBADDPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMSUBADDPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBADDPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBADDPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBADDPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBADDPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x5e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBADDPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x5e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBPD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMSUBPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMSUBPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBPS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFMSUBPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFMSUBPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFMSUBPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBSD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFMSUBSD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBSD m64, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBSD xmm, m64, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBSD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBSD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBSD m64, xmm, xmm, xmm
|
|
if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBSD xmm, m64, xmm, xmm
|
|
if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFMSUBSS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFMSUBSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFMSUBSS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBSS m32, xmm, xmm, xmm [FMA4]
|
|
// * VFMSUBSS xmm, m32, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFMSUBSS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFMSUBSS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBSS m32, xmm, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFMSUBSS xmm, m32, xmm, xmm
|
|
if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFMSUBSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD132PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD132PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD132PD m128, xmm, xmm [FMA3]
|
|
// * VFNMADD132PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD132PD m256, ymm, ymm [FMA3]
|
|
// * VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD132PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD132PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD132PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD132PS m128, xmm, xmm [FMA3]
|
|
// * VFNMADD132PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD132PS m256, ymm, ymm [FMA3]
|
|
// * VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD132PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD132SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD132SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD132SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD132SD m64, xmm, xmm [FMA3]
|
|
// * VFNMADD132SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD132SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD132SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD132SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD132SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD132SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMADD132SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD132SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD132SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD132SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD132SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD132SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD132SS m32, xmm, xmm [FMA3]
|
|
// * VFNMADD132SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD132SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD132SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD132SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD132SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD132SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD132SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD132SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMADD132SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD132SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD132SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD213PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD213PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD213PD m128, xmm, xmm [FMA3]
|
|
// * VFNMADD213PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD213PD m256, ymm, ymm [FMA3]
|
|
// * VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD213PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD213PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD213PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD213PS m128, xmm, xmm [FMA3]
|
|
// * VFNMADD213PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD213PS m256, ymm, ymm [FMA3]
|
|
// * VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD213PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xac)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xac)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD213SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD213SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD213SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD213SD m64, xmm, xmm [FMA3]
|
|
// * VFNMADD213SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD213SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD213SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD213SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD213SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD213SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMADD213SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD213SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD213SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD213SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD213SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD213SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD213SS m32, xmm, xmm [FMA3]
|
|
// * VFNMADD213SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD213SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD213SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD213SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD213SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD213SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD213SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD213SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xad)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMADD213SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD213SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xad)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD213SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD231PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD231PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD231PD m128, xmm, xmm [FMA3]
|
|
// * VFNMADD231PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD231PD m256, ymm, ymm [FMA3]
|
|
// * VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD231PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD231PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMADD231PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD231PS m128, xmm, xmm [FMA3]
|
|
// * VFNMADD231PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMADD231PS m256, ymm, ymm [FMA3]
|
|
// * VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD231PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMADD231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMADD231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMADD231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xbc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD231SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD231SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD231SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD231SD m64, xmm, xmm [FMA3]
|
|
// * VFNMADD231SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD231SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD231SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD231SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD231SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD231SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMADD231SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD231SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD231SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADD231SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADD231SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMADD231SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMADD231SS m32, xmm, xmm [FMA3]
|
|
// * VFNMADD231SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMADD231SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMADD231SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMADD231SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMADD231SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMADD231SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMADD231SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMADD231SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMADD231SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMADD231SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADD231SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADDPD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADDPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFNMADDPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFNMADDPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFNMADDPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFNMADDPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFNMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMADDPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMADDPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADDPS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADDPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFNMADDPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFNMADDPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFNMADDPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFNMADDPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFNMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMADDPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMADDPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADDSD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADDSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFNMADDSD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDSD m64, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDSD xmm, m64, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFNMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMADDSD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMADDSD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDSD m64, xmm, xmm, xmm
|
|
if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDSD xmm, m64, xmm, xmm
|
|
if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMADDSS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMADDSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFNMADDSS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDSS m32, xmm, xmm, xmm [FMA4]
|
|
// * VFNMADDSS xmm, m32, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFNMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMADDSS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMADDSS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDSS m32, xmm, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMADDSS xmm, m32, xmm, xmm
|
|
if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMADDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB132PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB132PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB132PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB132PD m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB132PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB132PD m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB132PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB132PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB132PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB132PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB132PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB132PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB132PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB132PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB132PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB132PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB132PS m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB132PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB132PS m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB132PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB132PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB132PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB132PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB132PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB132PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB132PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB132SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB132SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB132SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB132SD m64, xmm, xmm [FMA3]
|
|
// * VFNMSUB132SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB132SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB132SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB132SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB132SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB132SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB132SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB132SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB132SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB132SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB132SS m32, xmm, xmm [FMA3]
|
|
// * VFNMSUB132SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB132SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB132SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB132SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB132SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB132SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB132SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x9f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB132SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB132SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB213PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB213PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB213PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB213PD m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB213PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB213PD m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB213PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB213PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB213PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB213PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB213PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB213PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB213PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB213PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB213PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB213PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB213PS m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB213PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB213PS m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB213PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB213PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB213PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB213PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB213PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xae)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB213PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xae)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB213PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB213SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB213SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB213SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB213SD m64, xmm, xmm [FMA3]
|
|
// * VFNMSUB213SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB213SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB213SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB213SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB213SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB213SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB213SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB213SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB213SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB213SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB213SS m32, xmm, xmm [FMA3]
|
|
// * VFNMSUB213SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB213SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB213SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB213SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB213SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB213SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB213SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xaf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB213SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xaf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB213SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB231PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB231PD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB231PD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB231PD m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB231PD ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB231PD m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB231PD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB231PD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB231PD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231PD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB231PD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB231PD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB231PD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB231PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB231PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB231PS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VFNMSUB231PS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB231PS m128, xmm, xmm [FMA3]
|
|
// * VFNMSUB231PS ymm, ymm, ymm [FMA3]
|
|
// * VFNMSUB231PS m256, ymm, ymm [FMA3]
|
|
// * VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VFNMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VFNMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB231PS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB231PS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB231PS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231PS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB231PS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VFNMSUB231PS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xbe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VFNMSUB231PS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xbe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB231PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB231SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB231SD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB231SD xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB231SD m64, xmm, xmm [FMA3]
|
|
// * VFNMSUB231SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB231SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB231SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB231SD takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB231SD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231SD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB231SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB231SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUB231SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUB231SS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VFNMSUB231SS xmm, xmm, xmm [FMA3]
|
|
// * VFNMSUB231SS m32, xmm, xmm [FMA3]
|
|
// * VFNMSUB231SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VFNMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VFNMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VFNMSUB231SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VFNMSUB231SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VFNMSUB231SS takes 3 or 4 operands")
|
|
}
|
|
// VFNMSUB231SS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFNMSUB231SS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_FMA3)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFNMSUB231SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xbf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VFNMSUB231SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xbf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUB231SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUBPD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUBPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFNMSUBPD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBPD m128, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBPD xmm, m128, xmm, xmm [FMA4]
|
|
// * VFNMSUBPD ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFNMSUBPD m256, ymm, ymm, ymm [FMA4]
|
|
// * VFNMSUBPD ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFNMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMSUBPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMSUBPD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPD m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPD ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPD m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPD ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUBPS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUBPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFNMSUBPS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBPS m128, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBPS xmm, m128, xmm, xmm [FMA4]
|
|
// * VFNMSUBPS ymm, ymm, ymm, ymm [FMA4]
|
|
// * VFNMSUBPS m256, ymm, ymm, ymm [FMA4]
|
|
// * VFNMSUBPS ymm, m256, ymm, ymm [FMA4]
|
|
//
|
|
func (self *Program) VFNMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMSUBPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMSUBPS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPS m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPS xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPS ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPS m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBPS ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUBSD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUBSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFNMSUBSD xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBSD m64, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBSD xmm, m64, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFNMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMSUBSD", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMSUBSD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBSD m64, xmm, xmm, xmm
|
|
if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBSD xmm, m64, xmm, xmm
|
|
if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUBSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFNMSUBSS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFNMSUBSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VFNMSUBSS xmm, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBSS m32, xmm, xmm, xmm [FMA4]
|
|
// * VFNMSUBSS xmm, m32, xmm, xmm [FMA4]
|
|
//
|
|
func (self *Program) VFNMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VFNMSUBSS", 4, Operands { v0, v1, v2, v3 })
|
|
// VFNMSUBSS xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBSS m32, xmm, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VFNMSUBSS xmm, m32, xmm, xmm
|
|
if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_FMA4)
|
|
p.domain = DomainFMA
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFNMSUBSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFPCLASSPD performs "Test Class of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFPCLASSPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFPCLASSPD imm8, m512/m64bcst, k{k} [AVX512DQ]
|
|
// * VFPCLASSPD imm8, zmm, k{k} [AVX512DQ]
|
|
// * VFPCLASSPD imm8, m128/m64bcst, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPD imm8, m256/m64bcst, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPD imm8, xmm, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPD imm8, ymm, k{k} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VFPCLASSPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VFPCLASSPD", 3, Operands { v0, v1, v2 })
|
|
// VFPCLASSPD imm8, m512/m64bcst, k{k}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPD imm8, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(kcode(v[2]) | 0x48)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPD imm8, m128/m64bcst, k{k}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPD imm8, m256/m64bcst, k{k}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPD imm8, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(kcode(v[2]) | 0x08)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPD imm8, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(kcode(v[2]) | 0x28)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFPCLASSPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFPCLASSPS performs "Test Class of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VFPCLASSPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VFPCLASSPS imm8, m512/m32bcst, k{k} [AVX512DQ]
|
|
// * VFPCLASSPS imm8, zmm, k{k} [AVX512DQ]
|
|
// * VFPCLASSPS imm8, m128/m32bcst, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPS imm8, m256/m32bcst, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPS imm8, xmm, k{k} [AVX512DQ,AVX512VL]
|
|
// * VFPCLASSPS imm8, ymm, k{k} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VFPCLASSPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VFPCLASSPS", 3, Operands { v0, v1, v2 })
|
|
// VFPCLASSPS imm8, m512/m32bcst, k{k}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPS imm8, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(kcode(v[2]) | 0x48)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPS imm8, m128/m32bcst, k{k}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPS imm8, m256/m32bcst, k{k}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPS imm8, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(kcode(v[2]) | 0x08)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSPS imm8, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(kcode(v[2]) | 0x28)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFPCLASSPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFPCLASSSD performs "Test Class of Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VFPCLASSSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VFPCLASSSD imm8, xmm, k{k} [AVX512DQ]
|
|
// * VFPCLASSSD imm8, m64, k{k} [AVX512DQ]
|
|
//
|
|
func (self *Program) VFPCLASSSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VFPCLASSSD", 3, Operands { v0, v1, v2 })
|
|
// VFPCLASSSD imm8, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(kcode(v[2]) | 0x08)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSSD imm8, m64, k{k}
|
|
if isImm8(v0) && isM64(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFPCLASSSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFPCLASSSS performs "Test Class of Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VFPCLASSSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VFPCLASSSS imm8, xmm, k{k} [AVX512DQ]
|
|
// * VFPCLASSSS imm8, m32, k{k} [AVX512DQ]
|
|
//
|
|
func (self *Program) VFPCLASSSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VFPCLASSSS", 3, Operands { v0, v1, v2 })
|
|
// VFPCLASSSS imm8, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(kcode(v[2]) | 0x08)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VFPCLASSSS imm8, m32, k{k}
|
|
if isImm8(v0) && isM32(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFPCLASSSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFRCZPD performs "Extract Fraction Packed Double-Precision Floating-Point".
|
|
//
|
|
// Mnemonic : VFRCZPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VFRCZPD xmm, xmm [XOP]
|
|
// * VFRCZPD m128, xmm [XOP]
|
|
// * VFRCZPD ymm, ymm [XOP]
|
|
// * VFRCZPD m256, ymm [XOP]
|
|
//
|
|
func (self *Program) VFRCZPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VFRCZPD", 2, Operands { v0, v1 })
|
|
// VFRCZPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0x81)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x81)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFRCZPD ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7c)
|
|
m.emit(0x81)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZPD m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x81)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFRCZPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFRCZPS performs "Extract Fraction Packed Single-Precision Floating-Point".
|
|
//
|
|
// Mnemonic : VFRCZPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VFRCZPS xmm, xmm [XOP]
|
|
// * VFRCZPS m128, xmm [XOP]
|
|
// * VFRCZPS ymm, ymm [XOP]
|
|
// * VFRCZPS m256, ymm [XOP]
|
|
//
|
|
func (self *Program) VFRCZPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VFRCZPS", 2, Operands { v0, v1 })
|
|
// VFRCZPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0x80)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x80)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VFRCZPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7c)
|
|
m.emit(0x80)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x80)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFRCZPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFRCZSD performs "Extract Fraction Scalar Double-Precision Floating-Point".
|
|
//
|
|
// Mnemonic : VFRCZSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VFRCZSD xmm, xmm [XOP]
|
|
// * VFRCZSD m64, xmm [XOP]
|
|
//
|
|
func (self *Program) VFRCZSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VFRCZSD", 2, Operands { v0, v1 })
|
|
// VFRCZSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZSD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x83)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFRCZSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VFRCZSS performs "Extract Fraction Scalar Single-Precision Floating Point".
|
|
//
|
|
// Mnemonic : VFRCZSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VFRCZSS xmm, xmm [XOP]
|
|
// * VFRCZSS m32, xmm [XOP]
|
|
//
|
|
func (self *Program) VFRCZSS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VFRCZSS", 2, Operands { v0, v1 })
|
|
// VFRCZSS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0x82)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VFRCZSS m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x82)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VFRCZSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERDPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VGATHERDPD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VGATHERDPD xmm, vm32x, xmm [AVX2]
|
|
// * VGATHERDPD ymm, vm32x, ymm [AVX2]
|
|
// * VGATHERDPD vm32y, zmm{k} [AVX512F]
|
|
// * VGATHERDPD vm32x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VGATHERDPD vm32x, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGATHERDPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGATHERDPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGATHERDPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGATHERDPD takes 2 or 3 operands")
|
|
}
|
|
// VGATHERDPD xmm, vm32x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERDPD ymm, vm32x, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERDPD vm32y, zmm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VGATHERDPD vm32x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VGATHERDPD vm32x, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERDPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VGATHERDPS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VGATHERDPS xmm, vm32x, xmm [AVX2]
|
|
// * VGATHERDPS ymm, vm32y, ymm [AVX2]
|
|
// * VGATHERDPS vm32z, zmm{k} [AVX512F]
|
|
// * VGATHERDPS vm32x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VGATHERDPS vm32y, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGATHERDPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGATHERDPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGATHERDPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGATHERDPS takes 2 or 3 operands")
|
|
}
|
|
// VGATHERDPS xmm, vm32x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERDPS ymm, vm32y, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERDPS vm32z, zmm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VGATHERDPS vm32x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VGATHERDPS vm32y, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF0DPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF0DPD vm32y{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF0DPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF0DPD", 1, Operands { v0 })
|
|
// VGATHERPF0DPD vm32y{k}
|
|
if isVMYk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(1, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF0DPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF0DPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF0DPS vm32z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF0DPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF0DPS", 1, Operands { v0 })
|
|
// VGATHERPF0DPS vm32z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(1, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF0DPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF0QPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF0QPD vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF0QPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF0QPD", 1, Operands { v0 })
|
|
// VGATHERPF0QPD vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(1, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF0QPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF0QPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF0QPS vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF0QPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF0QPS", 1, Operands { v0 })
|
|
// VGATHERPF0QPS vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(1, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF0QPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF1DPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF1DPD vm32y{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF1DPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF1DPD", 1, Operands { v0 })
|
|
// VGATHERPF1DPD vm32y{k}
|
|
if isVMYk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(2, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF1DPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF1DPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF1DPS vm32z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF1DPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF1DPS", 1, Operands { v0 })
|
|
// VGATHERPF1DPS vm32z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(2, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF1DPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF1QPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF1QPD vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF1QPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF1QPD", 1, Operands { v0 })
|
|
// VGATHERPF1QPD vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(2, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF1QPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint".
|
|
//
|
|
// Mnemonic : VGATHERPF1QPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VGATHERPF1QPS vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VGATHERPF1QPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VGATHERPF1QPS", 1, Operands { v0 })
|
|
// VGATHERPF1QPS vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(2, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERPF1QPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERQPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VGATHERQPD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VGATHERQPD xmm, vm64x, xmm [AVX2]
|
|
// * VGATHERQPD ymm, vm64y, ymm [AVX2]
|
|
// * VGATHERQPD vm64z, zmm{k} [AVX512F]
|
|
// * VGATHERQPD vm64x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VGATHERQPD vm64y, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGATHERQPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGATHERQPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGATHERQPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGATHERQPD takes 2 or 3 operands")
|
|
}
|
|
// VGATHERQPD xmm, vm64x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERQPD ymm, vm64y, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERQPD vm64z, zmm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VGATHERQPD vm64x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VGATHERQPD vm64y, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERQPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGATHERQPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VGATHERQPS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VGATHERQPS xmm, vm64x, xmm [AVX2]
|
|
// * VGATHERQPS xmm, vm64y, xmm [AVX2]
|
|
// * VGATHERQPS vm64z, ymm{k} [AVX512F]
|
|
// * VGATHERQPS vm64x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VGATHERQPS vm64y, xmm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGATHERQPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGATHERQPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGATHERQPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGATHERQPS takes 2 or 3 operands")
|
|
}
|
|
// VGATHERQPS xmm, vm64x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERQPS xmm, vm64y, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VGATHERQPS vm64z, ymm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VGATHERQPS vm64x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VGATHERQPS vm64y, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGATHERQPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETEXPPD performs "Extract Exponents of Packed Double-Precision Floating-Point Values as Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VGETEXPPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VGETEXPPD m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPD {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGETEXPPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETEXPPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGETEXPPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGETEXPPD takes 2 or 3 operands")
|
|
}
|
|
// VGETEXPPD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VGETEXPPD {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VGETEXPPD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VGETEXPPD m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VGETEXPPD m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VGETEXPPD xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VGETEXPPD ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETEXPPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETEXPPS performs "Extract Exponents of Packed Single-Precision Floating-Point Values as Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VGETEXPPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VGETEXPPS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPS {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETEXPPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETEXPPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGETEXPPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETEXPPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VGETEXPPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VGETEXPPS takes 2 or 3 operands")
|
|
}
|
|
// VGETEXPPS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VGETEXPPS {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VGETEXPPS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VGETEXPPS m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VGETEXPPS m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VGETEXPPS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VGETEXPPS ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETEXPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETEXPSD performs "Extract Exponent of Scalar Double-Precision Floating-Point Value as Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VGETEXPSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VGETEXPSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETEXPSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VGETEXPSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETEXPSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VGETEXPSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VGETEXPSD takes 3 or 4 operands")
|
|
}
|
|
// VGETEXPSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VGETEXPSD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VGETEXPSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETEXPSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETEXPSS performs "Extract Exponent of Scalar Single-Precision Floating-Point Value as Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VGETEXPSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VGETEXPSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETEXPSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VGETEXPSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETEXPSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VGETEXPSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VGETEXPSS takes 3 or 4 operands")
|
|
}
|
|
// VGETEXPSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VGETEXPSS {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VGETEXPSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETEXPSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETMANTPD performs "Extract Normalized Mantissas from Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VGETMANTPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGETMANTPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETMANTPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VGETMANTPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VGETMANTPD takes 3 or 4 operands")
|
|
}
|
|
// VGETMANTPD imm8, m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPD imm8, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETMANTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETMANTPS performs "Extract Normalized Mantissas from Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VGETMANTPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPS imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VGETMANTPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VGETMANTPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETMANTPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VGETMANTPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VGETMANTPS takes 3 or 4 operands")
|
|
}
|
|
// VGETMANTPS imm8, m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTPS imm8, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETMANTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETMANTSD performs "Extract Normalized Mantissa from Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VGETMANTSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VGETMANTSD imm8, m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETMANTSD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VGETMANTSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETMANTSD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VGETMANTSD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VGETMANTSD takes 4 or 5 operands")
|
|
}
|
|
// VGETMANTSD imm8, m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTSD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETMANTSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VGETMANTSS performs "Extract Normalized Mantissa from Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VGETMANTSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VGETMANTSS imm8, m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VGETMANTSS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VGETMANTSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VGETMANTSS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VGETMANTSS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VGETMANTSS takes 4 or 5 operands")
|
|
}
|
|
// VGETMANTSS imm8, m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VGETMANTSS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VGETMANTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VHADDPD performs "Packed Double-FP Horizontal Add".
|
|
//
|
|
// Mnemonic : VHADDPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VHADDPD xmm, xmm, xmm [AVX]
|
|
// * VHADDPD m128, xmm, xmm [AVX]
|
|
// * VHADDPD ymm, ymm, ymm [AVX]
|
|
// * VHADDPD m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VHADDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VHADDPD", 3, Operands { v0, v1, v2 })
|
|
// VHADDPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHADDPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VHADDPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHADDPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VHADDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VHADDPS performs "Packed Single-FP Horizontal Add".
|
|
//
|
|
// Mnemonic : VHADDPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VHADDPS xmm, xmm, xmm [AVX]
|
|
// * VHADDPS m128, xmm, xmm [AVX]
|
|
// * VHADDPS ymm, ymm, ymm [AVX]
|
|
// * VHADDPS m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VHADDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VHADDPS", 3, Operands { v0, v1, v2 })
|
|
// VHADDPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHADDPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VHADDPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHADDPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VHADDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VHSUBPD performs "Packed Double-FP Horizontal Subtract".
|
|
//
|
|
// Mnemonic : VHSUBPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VHSUBPD xmm, xmm, xmm [AVX]
|
|
// * VHSUBPD m128, xmm, xmm [AVX]
|
|
// * VHSUBPD ymm, ymm, ymm [AVX]
|
|
// * VHSUBPD m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VHSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VHSUBPD", 3, Operands { v0, v1, v2 })
|
|
// VHSUBPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHSUBPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VHSUBPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHSUBPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VHSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VHSUBPS performs "Packed Single-FP Horizontal Subtract".
|
|
//
|
|
// Mnemonic : VHSUBPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VHSUBPS xmm, xmm, xmm [AVX]
|
|
// * VHSUBPS m128, xmm, xmm [AVX]
|
|
// * VHSUBPS ymm, ymm, ymm [AVX]
|
|
// * VHSUBPS m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VHSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VHSUBPS", 3, Operands { v0, v1, v2 })
|
|
// VHSUBPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHSUBPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VHSUBPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VHSUBPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VHSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTF128 performs "Insert Packed Floating-Point Values".
|
|
//
|
|
// Mnemonic : VINSERTF128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTF128 imm8, xmm, ymm, ymm [AVX]
|
|
// * VINSERTF128 imm8, m128, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VINSERTF128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTF128", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTF128 imm8, xmm, ymm, ymm
|
|
if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF128 imm8, m128, ymm, ymm
|
|
if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTF128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTF32X4 performs "Insert 128 Bits of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VINSERTF32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VINSERTF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTF32X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF32X4 imm8, m128, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF32X4 imm8, m128, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTF32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTF32X8 performs "Insert 256 Bits of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VINSERTF32X8
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VINSERTF32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTF32X8", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF32X8 imm8, m256, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTF32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTF64X2 performs "Insert 128 Bits of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VINSERTF64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VINSERTF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTF64X2", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF64X2 imm8, m128, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x18)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF64X2 imm8, m128, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x18)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTF64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTF64X4 performs "Insert 256 Bits of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VINSERTF64X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VINSERTF64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTF64X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTF64X4 imm8, m256, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x1a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTF64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTI128 performs "Insert Packed Integer Values".
|
|
//
|
|
// Mnemonic : VINSERTI128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTI128 imm8, xmm, ymm, ymm [AVX2]
|
|
// * VINSERTI128 imm8, m128, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VINSERTI128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTI128", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTI128 imm8, xmm, ymm, ymm
|
|
if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI128 imm8, m128, ymm, ymm
|
|
if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTI128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTI32X4 performs "Insert 128 Bits of Packed Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VINSERTI32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VINSERTI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTI32X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI32X4 imm8, m128, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI32X4 imm8, m128, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTI32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTI32X8 performs "Insert 256 Bits of Packed Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VINSERTI32X8
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VINSERTI32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTI32X8", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI32X8 imm8, m256, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTI32X8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTI64X2 performs "Insert 128 Bits of Packed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VINSERTI64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VINSERTI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTI64X2", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI64X2 imm8, m128, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI64X2 imm8, m128, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTI64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTI64X4 performs "Insert 256 Bits of Packed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VINSERTI64X4
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VINSERTI64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTI64X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTI64X4 imm8, m256, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTI64X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VINSERTPS performs "Insert Packed Single Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VINSERTPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VINSERTPS imm8, xmm, xmm, xmm [AVX]
|
|
// * VINSERTPS imm8, m32, xmm, xmm [AVX]
|
|
// * VINSERTPS imm8, xmm, xmm, xmm [AVX512F]
|
|
// * VINSERTPS imm8, m32, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VINSERTPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VINSERTPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VINSERTPS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTPS imm8, m32, xmm, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTPS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VINSERTPS imm8, m32, xmm, xmm
|
|
if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VINSERTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VLDDQU performs "Load Unaligned Integer 128 Bits".
|
|
//
|
|
// Mnemonic : VLDDQU
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VLDDQU m128, xmm [AVX]
|
|
// * VLDDQU m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VLDDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VLDDQU", 2, Operands { v0, v1 })
|
|
// VLDDQU m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VLDDQU m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xf0)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VLDDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VLDMXCSR performs "Load MXCSR Register".
|
|
//
|
|
// Mnemonic : VLDMXCSR
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VLDMXCSR m32 [AVX]
|
|
//
|
|
func (self *Program) VLDMXCSR(v0 interface{}) *Instruction {
|
|
p := self.alloc("VLDMXCSR", 1, Operands { v0 })
|
|
// VLDMXCSR m32
|
|
if isM32(v0) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, addr(v[0]), 0)
|
|
m.emit(0xae)
|
|
m.mrsd(2, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VLDMXCSR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMASKMOVDQU performs "Store Selected Bytes of Double Quadword".
|
|
//
|
|
// Mnemonic : VMASKMOVDQU
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VMASKMOVDQU xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VMASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMASKMOVDQU", 2, Operands { v0, v1 })
|
|
// VMASKMOVDQU xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0xf7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMASKMOVDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMASKMOVPD performs "Conditional Move Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMASKMOVPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMASKMOVPD m128, xmm, xmm [AVX]
|
|
// * VMASKMOVPD m256, ymm, ymm [AVX]
|
|
// * VMASKMOVPD xmm, xmm, m128 [AVX]
|
|
// * VMASKMOVPD ymm, ymm, m256 [AVX]
|
|
//
|
|
func (self *Program) VMASKMOVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VMASKMOVPD", 3, Operands { v0, v1, v2 })
|
|
// VMASKMOVPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPD xmm, xmm, m128
|
|
if isXMM(v0) && isXMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPD ymm, ymm, m256
|
|
if isYMM(v0) && isYMM(v1) && isM256(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x2f)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMASKMOVPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMASKMOVPS performs "Conditional Move Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMASKMOVPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMASKMOVPS m128, xmm, xmm [AVX]
|
|
// * VMASKMOVPS m256, ymm, ymm [AVX]
|
|
// * VMASKMOVPS xmm, xmm, m128 [AVX]
|
|
// * VMASKMOVPS ymm, ymm, m256 [AVX]
|
|
//
|
|
func (self *Program) VMASKMOVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VMASKMOVPS", 3, Operands { v0, v1, v2 })
|
|
// VMASKMOVPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPS xmm, xmm, m128
|
|
if isXMM(v0) && isXMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
// VMASKMOVPS ymm, ymm, m256
|
|
if isYMM(v0) && isYMM(v1) && isM256(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMASKMOVPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMAXPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMAXPD xmm, xmm, xmm [AVX]
|
|
// * VMAXPD m128, xmm, xmm [AVX]
|
|
// * VMAXPD ymm, ymm, ymm [AVX]
|
|
// * VMAXPD m256, ymm, ymm [AVX]
|
|
// * VMAXPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMAXPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMAXPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMAXPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMAXPD takes 3 or 4 operands")
|
|
}
|
|
// VMAXPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMAXPD {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMAXPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMAXPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMAXPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMAXPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMAXPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMAXPS xmm, xmm, xmm [AVX]
|
|
// * VMAXPS m128, xmm, xmm [AVX]
|
|
// * VMAXPS ymm, ymm, ymm [AVX]
|
|
// * VMAXPS m256, ymm, ymm [AVX]
|
|
// * VMAXPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMAXPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMAXPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMAXPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMAXPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMAXPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMAXPS takes 3 or 4 operands")
|
|
}
|
|
// VMAXPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMAXPS {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMAXPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMAXPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMAXPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMAXPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMAXSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMAXSD xmm, xmm, xmm [AVX]
|
|
// * VMAXSD m64, xmm, xmm [AVX]
|
|
// * VMAXSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMAXSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMAXSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMAXSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMAXSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMAXSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMAXSD takes 3 or 4 operands")
|
|
}
|
|
// VMAXSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMAXSD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMAXSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMAXSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMAXSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMAXSS xmm, xmm, xmm [AVX]
|
|
// * VMAXSS m32, xmm, xmm [AVX]
|
|
// * VMAXSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMAXSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMAXSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMAXSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMAXSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMAXSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMAXSS takes 3 or 4 operands")
|
|
}
|
|
// VMAXSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMAXSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMAXSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VMAXSS {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMAXSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMAXSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMINPD performs "Return Minimum Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMINPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMINPD xmm, xmm, xmm [AVX]
|
|
// * VMINPD m128, xmm, xmm [AVX]
|
|
// * VMINPD ymm, ymm, ymm [AVX]
|
|
// * VMINPD m256, ymm, ymm [AVX]
|
|
// * VMINPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMINPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMINPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMINPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMINPD takes 3 or 4 operands")
|
|
}
|
|
// VMINPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMINPD {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMINPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMINPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMINPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMINPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMINPS performs "Return Minimum Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMINPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMINPS xmm, xmm, xmm [AVX]
|
|
// * VMINPS m128, xmm, xmm [AVX]
|
|
// * VMINPS ymm, ymm, ymm [AVX]
|
|
// * VMINPS m256, ymm, ymm [AVX]
|
|
// * VMINPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMINPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMINPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMINPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMINPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMINPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMINPS takes 3 or 4 operands")
|
|
}
|
|
// VMINPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMINPS {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMINPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMINPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMINPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMINPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMINSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMINSD xmm, xmm, xmm [AVX]
|
|
// * VMINSD m64, xmm, xmm [AVX]
|
|
// * VMINSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMINSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMINSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMINSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMINSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMINSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMINSD takes 3 or 4 operands")
|
|
}
|
|
// VMINSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMINSD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMINSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMINSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMINSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMINSS xmm, xmm, xmm [AVX]
|
|
// * VMINSS m32, xmm, xmm [AVX]
|
|
// * VMINSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMINSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMINSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMINSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMINSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMINSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMINSS takes 3 or 4 operands")
|
|
}
|
|
// VMINSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMINSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMINSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VMINSS {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMINSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMINSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVAPD
|
|
// Supported forms : (15 forms)
|
|
//
|
|
// * VMOVAPD xmm, xmm [AVX]
|
|
// * VMOVAPD m128, xmm [AVX]
|
|
// * VMOVAPD ymm, ymm [AVX]
|
|
// * VMOVAPD m256, ymm [AVX]
|
|
// * VMOVAPD xmm, m128 [AVX]
|
|
// * VMOVAPD ymm, m256 [AVX]
|
|
// * VMOVAPD zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVAPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVAPD m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVAPD xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPD ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPD m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPD m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVAPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVAPD", 2, Operands { v0, v1 })
|
|
// VMOVAPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), v[1], 0)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVAPD ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), v[1], 0)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPD m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVAPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVAPD ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVAPD zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVAPD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPD m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVAPD xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVAPD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPD ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVAPD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPD m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVAPD m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVAPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVAPS
|
|
// Supported forms : (15 forms)
|
|
//
|
|
// * VMOVAPS xmm, xmm [AVX]
|
|
// * VMOVAPS m128, xmm [AVX]
|
|
// * VMOVAPS ymm, ymm [AVX]
|
|
// * VMOVAPS m256, ymm [AVX]
|
|
// * VMOVAPS xmm, m128 [AVX]
|
|
// * VMOVAPS ymm, m256 [AVX]
|
|
// * VMOVAPS zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVAPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVAPS m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVAPS xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPS ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPS m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVAPS m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVAPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVAPS", 2, Operands { v0, v1 })
|
|
// VMOVAPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), v[1], 0)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVAPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[0]), v[1], 0)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVAPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVAPS ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVAPS zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVAPS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPS m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVAPS xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVAPS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPS ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVAPS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVAPS m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVAPS m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVAPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVD performs "Move Doubleword".
|
|
//
|
|
// Mnemonic : VMOVD
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * VMOVD xmm, r32 [AVX]
|
|
// * VMOVD r32, xmm [AVX]
|
|
// * VMOVD m32, xmm [AVX]
|
|
// * VMOVD xmm, m32 [AVX]
|
|
// * VMOVD xmm, r32 [AVX512F]
|
|
// * VMOVD r32, xmm [AVX512F]
|
|
// * VMOVD m32, xmm [AVX512F]
|
|
// * VMOVD xmm, m32 [AVX512F]
|
|
//
|
|
func (self *Program) VMOVD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVD", 2, Operands { v0, v1 })
|
|
// VMOVD xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), v[1], 0)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVD r32, xmm
|
|
if isReg32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVD xmm, m32
|
|
if isXMM(v0) && isM32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVD xmm, r32
|
|
if isEVEXXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVD r32, xmm
|
|
if isReg32(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVD m32, xmm
|
|
if isM32(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VMOVD xmm, m32
|
|
if isEVEXXMM(v0) && isM32(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDDUP performs "Move One Double-FP and Duplicate".
|
|
//
|
|
// Mnemonic : VMOVDDUP
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VMOVDDUP xmm, xmm [AVX]
|
|
// * VMOVDDUP m64, xmm [AVX]
|
|
// * VMOVDDUP ymm, ymm [AVX]
|
|
// * VMOVDDUP m256, ymm [AVX]
|
|
// * VMOVDDUP zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVDDUP m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVDDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDDUP m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDDUP", 2, Operands { v0, v1 })
|
|
// VMOVDDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), v[0], 0)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVDDUP m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDDUP ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[1]), v[0], 0)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVDDUP m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDDUP zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVDDUP m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDDUP xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVDDUP ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVDDUP m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMOVDDUP m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQA performs "Move Aligned Double Quadword".
|
|
//
|
|
// Mnemonic : VMOVDQA
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VMOVDQA xmm, xmm [AVX]
|
|
// * VMOVDQA m128, xmm [AVX]
|
|
// * VMOVDQA ymm, ymm [AVX]
|
|
// * VMOVDQA m256, ymm [AVX]
|
|
// * VMOVDQA xmm, m128 [AVX]
|
|
// * VMOVDQA ymm, m256 [AVX]
|
|
//
|
|
func (self *Program) VMOVDQA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQA", 2, Operands { v0, v1 })
|
|
// VMOVDQA xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), v[1], 0)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDQA ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), v[1], 0)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDQA xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVDQA ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQA32 performs "Move Aligned Doubleword Values".
|
|
//
|
|
// Mnemonic : VMOVDQA32
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQA32 zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVDQA32 zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQA32 m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQA32 xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA32 xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA32 ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA32 ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA32 m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA32 m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQA32(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQA32", 2, Operands { v0, v1 })
|
|
// VMOVDQA32 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQA32 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA32 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQA32 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQA32 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA32 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQA32 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA32 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQA32 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQA32")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQA64 performs "Move Aligned Quadword Values".
|
|
//
|
|
// Mnemonic : VMOVDQA64
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQA64 zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVDQA64 zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQA64 m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQA64 xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA64 xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA64 ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA64 ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA64 m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQA64 m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQA64(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQA64", 2, Operands { v0, v1 })
|
|
// VMOVDQA64 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQA64 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA64 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQA64 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQA64 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA64 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQA64 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQA64 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQA64 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQA64")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQU performs "Move Unaligned Double Quadword".
|
|
//
|
|
// Mnemonic : VMOVDQU
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VMOVDQU xmm, xmm [AVX]
|
|
// * VMOVDQU m128, xmm [AVX]
|
|
// * VMOVDQU ymm, ymm [AVX]
|
|
// * VMOVDQU m256, ymm [AVX]
|
|
// * VMOVDQU xmm, m128 [AVX]
|
|
// * VMOVDQU ymm, m256 [AVX]
|
|
//
|
|
func (self *Program) VMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQU", 2, Operands { v0, v1 })
|
|
// VMOVDQU xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[0]), v[1], 0)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDQU ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), v[0], 0)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[0]), v[1], 0)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVDQU xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVDQU ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQU")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQU16 performs "Move Unaligned Word Values".
|
|
//
|
|
// Mnemonic : VMOVDQU16
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQU16 zmm, m512{k}{z} [AVX512BW]
|
|
// * VMOVDQU16 zmm, zmm{k}{z} [AVX512BW]
|
|
// * VMOVDQU16 m512, zmm{k}{z} [AVX512BW]
|
|
// * VMOVDQU16 xmm, m128{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU16 xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU16 ymm, m256{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU16 ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU16 m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU16 m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQU16(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQU16", 2, Operands { v0, v1 })
|
|
// VMOVDQU16 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU16 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU16 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU16 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU16 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU16 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQU16 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xff)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU16 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU16 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQU16")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQU32 performs "Move Unaligned Doubleword Values".
|
|
//
|
|
// Mnemonic : VMOVDQU32
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQU32 zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVDQU32 zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQU32 m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQU32 xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU32 xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU32 ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU32 ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU32 m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU32 m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQU32(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQU32", 2, Operands { v0, v1 })
|
|
// VMOVDQU32 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU32 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU32 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU32 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU32 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU32 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQU32 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU32 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU32 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQU32")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQU64 performs "Move Unaligned Quadword Values".
|
|
//
|
|
// Mnemonic : VMOVDQU64
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQU64 zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVDQU64 zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQU64 m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVDQU64 xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU64 xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU64 ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU64 ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU64 m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVDQU64 m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQU64(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQU64", 2, Operands { v0, v1 })
|
|
// VMOVDQU64 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU64 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU64 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU64 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU64 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU64 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQU64 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU64 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU64 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQU64")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVDQU8 performs "Move Unaligned Byte Values".
|
|
//
|
|
// Mnemonic : VMOVDQU8
|
|
// Supported forms : (9 forms)
|
|
//
|
|
// * VMOVDQU8 zmm, m512{k}{z} [AVX512BW]
|
|
// * VMOVDQU8 zmm, zmm{k}{z} [AVX512BW]
|
|
// * VMOVDQU8 m512, zmm{k}{z} [AVX512BW]
|
|
// * VMOVDQU8 xmm, m128{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU8 xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU8 ymm, m256{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU8 ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU8 m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VMOVDQU8 m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVDQU8(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVDQU8", 2, Operands { v0, v1 })
|
|
// VMOVDQU8 zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU8 zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU8 m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVDQU8 xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU8 xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU8 ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVDQU8 ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x6f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVDQU8 m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVDQU8 m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x6f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVDQU8")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low".
|
|
//
|
|
// Mnemonic : VMOVHLPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VMOVHLPS xmm, xmm, xmm [AVX]
|
|
// * VMOVHLPS xmm, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVHLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VMOVHLPS", 3, Operands { v0, v1, v2 })
|
|
// VMOVHLPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVHLPS xmm, xmm, xmm
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVHLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVHPD performs "Move High Packed Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMOVHPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMOVHPD xmm, m64 [AVX]
|
|
// * VMOVHPD m64, xmm, xmm [AVX]
|
|
// * VMOVHPD xmm, m64 [AVX512F]
|
|
// * VMOVHPD m64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVHPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVHPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVHPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVHPD takes 2 or 3 operands")
|
|
}
|
|
// VMOVHPD xmm, m64
|
|
if len(vv) == 0 && isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVHPD m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVHPD xmm, m64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VMOVHPD m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVHPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVHPS performs "Move High Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVHPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMOVHPS xmm, m64 [AVX]
|
|
// * VMOVHPS m64, xmm, xmm [AVX]
|
|
// * VMOVHPS xmm, m64 [AVX512F]
|
|
// * VMOVHPS m64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVHPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVHPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVHPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVHPS takes 2 or 3 operands")
|
|
}
|
|
// VMOVHPS xmm, m64
|
|
if len(vv) == 0 && isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVHPS m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVHPS xmm, m64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VMOVHPS m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High".
|
|
//
|
|
// Mnemonic : VMOVLHPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VMOVLHPS xmm, xmm, xmm [AVX]
|
|
// * VMOVLHPS xmm, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVLHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VMOVLHPS", 3, Operands { v0, v1, v2 })
|
|
// VMOVLHPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVLHPS xmm, xmm, xmm
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVLHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVLPD performs "Move Low Packed Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMOVLPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMOVLPD xmm, m64 [AVX]
|
|
// * VMOVLPD m64, xmm, xmm [AVX]
|
|
// * VMOVLPD xmm, m64 [AVX512F]
|
|
// * VMOVLPD m64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVLPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVLPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVLPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVLPD takes 2 or 3 operands")
|
|
}
|
|
// VMOVLPD xmm, m64
|
|
if len(vv) == 0 && isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVLPD m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVLPD xmm, m64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VMOVLPD m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVLPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVLPS performs "Move Low Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVLPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMOVLPS xmm, m64 [AVX]
|
|
// * VMOVLPS m64, xmm, xmm [AVX]
|
|
// * VMOVLPS xmm, m64 [AVX512F]
|
|
// * VMOVLPS m64, xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VMOVLPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVLPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVLPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVLPS takes 2 or 3 operands")
|
|
}
|
|
// VMOVLPS xmm, m64
|
|
if len(vv) == 0 && isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVLPS m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVLPS xmm, m64
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VMOVLPS m64, xmm, xmm
|
|
if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask".
|
|
//
|
|
// Mnemonic : VMOVMSKPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VMOVMSKPD xmm, r32 [AVX]
|
|
// * VMOVMSKPD ymm, r32 [AVX]
|
|
//
|
|
func (self *Program) VMOVMSKPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVMSKPD", 2, Operands { v0, v1 })
|
|
// VMOVMSKPD xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVMSKPD ymm, r32
|
|
if isYMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVMSKPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask".
|
|
//
|
|
// Mnemonic : VMOVMSKPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VMOVMSKPS xmm, r32 [AVX]
|
|
// * VMOVMSKPS ymm, r32 [AVX]
|
|
//
|
|
func (self *Program) VMOVMSKPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVMSKPS", 2, Operands { v0, v1 })
|
|
// VMOVMSKPS xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVMSKPS ymm, r32
|
|
if isYMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVMSKPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : VMOVNTDQ
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMOVNTDQ xmm, m128 [AVX]
|
|
// * VMOVNTDQ ymm, m256 [AVX]
|
|
// * VMOVNTDQ zmm, m512 [AVX512F]
|
|
// * VMOVNTDQ xmm, m128 [AVX512F,AVX512VL]
|
|
// * VMOVNTDQ ymm, m256 [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVNTDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVNTDQ", 2, Operands { v0, v1 })
|
|
// VMOVNTDQ xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTDQ ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTDQ zmm, m512
|
|
if isZMM(v0) && isM512(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVNTDQ xmm, m128
|
|
if isEVEXXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVNTDQ ymm, m256
|
|
if isEVEXYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0xe7)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVNTDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint".
|
|
//
|
|
// Mnemonic : VMOVNTDQA
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMOVNTDQA m128, xmm [AVX]
|
|
// * VMOVNTDQA m256, ymm [AVX2]
|
|
// * VMOVNTDQA m512, zmm [AVX512F]
|
|
// * VMOVNTDQA m128, xmm [AVX512F,AVX512VL]
|
|
// * VMOVNTDQA m256, ymm [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVNTDQA(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVNTDQA", 2, Operands { v0, v1 })
|
|
// VMOVNTDQA m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVNTDQA m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVNTDQA m512, zmm
|
|
if isM512(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVNTDQA m128, xmm
|
|
if isM128(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVNTDQA m256, ymm
|
|
if isM256(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2a)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVNTDQA")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : VMOVNTPD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMOVNTPD xmm, m128 [AVX]
|
|
// * VMOVNTPD ymm, m256 [AVX]
|
|
// * VMOVNTPD zmm, m512 [AVX512F]
|
|
// * VMOVNTPD xmm, m128 [AVX512F,AVX512VL]
|
|
// * VMOVNTPD ymm, m256 [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVNTPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVNTPD", 2, Operands { v0, v1 })
|
|
// VMOVNTPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTPD ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTPD zmm, m512
|
|
if isZMM(v0) && isM512(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVNTPD xmm, m128
|
|
if isEVEXXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVNTPD ymm, m256
|
|
if isEVEXYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVNTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint".
|
|
//
|
|
// Mnemonic : VMOVNTPS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMOVNTPS xmm, m128 [AVX]
|
|
// * VMOVNTPS ymm, m256 [AVX]
|
|
// * VMOVNTPS zmm, m512 [AVX512F]
|
|
// * VMOVNTPS xmm, m128 [AVX512F,AVX512VL]
|
|
// * VMOVNTPS ymm, m256 [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVNTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVNTPS", 2, Operands { v0, v1 })
|
|
// VMOVNTPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTPS ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVNTPS zmm, m512
|
|
if isZMM(v0) && isM512(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVNTPS xmm, m128
|
|
if isEVEXXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVNTPS ymm, m256
|
|
if isEVEXYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVNTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVQ performs "Move Quadword".
|
|
//
|
|
// Mnemonic : VMOVQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VMOVQ xmm, r64 [AVX]
|
|
// * VMOVQ r64, xmm [AVX]
|
|
// * VMOVQ xmm, xmm [AVX]
|
|
// * VMOVQ m64, xmm [AVX]
|
|
// * VMOVQ xmm, m64 [AVX]
|
|
// * VMOVQ xmm, r64 [AVX512F]
|
|
// * VMOVQ r64, xmm [AVX512F]
|
|
// * VMOVQ xmm, xmm [AVX512F]
|
|
// * VMOVQ m64, xmm [AVX512F]
|
|
// * VMOVQ xmm, m64 [AVX512F]
|
|
//
|
|
func (self *Program) VMOVQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVQ", 2, Operands { v0, v1 })
|
|
// VMOVQ xmm, r64
|
|
if isXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[0]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf9)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVQ r64, xmm
|
|
if isReg64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), v[1], 0)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x81, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVQ xmm, m64
|
|
if isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0xd6)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b1, 0x81, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVQ xmm, r64
|
|
if isEVEXXMM(v0) && isReg64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x08)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVQ r64, xmm
|
|
if isReg64(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x08)
|
|
m.emit(0x6e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVQ xmm, xmm
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x08)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVQ m64, xmm
|
|
if isM64(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x6e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMOVQ xmm, m64
|
|
if isEVEXXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
|
|
m.emit(0xd6)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVSD performs "Move Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VMOVSD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VMOVSD m64, xmm [AVX]
|
|
// * VMOVSD xmm, m64 [AVX]
|
|
// * VMOVSD xmm, xmm, xmm [AVX]
|
|
// * VMOVSD xmm, m64{k} [AVX512F]
|
|
// * VMOVSD m64, xmm{k}{z} [AVX512F]
|
|
// * VMOVSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMOVSD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVSD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVSD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVSD takes 2 or 3 operands")
|
|
}
|
|
// VMOVSD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSD xmm, m64
|
|
if len(vv) == 0 && isXMM(v0) && isM64(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVSD xmm, xmm, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[0]), v[2], hlcode(v[1]))
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// VMOVSD xmm, m64{k}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM64k(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VMOVSD m64, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMOVSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVSHDUP performs "Move Packed Single-FP High and Duplicate".
|
|
//
|
|
// Mnemonic : VMOVSHDUP
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VMOVSHDUP xmm, xmm [AVX]
|
|
// * VMOVSHDUP m128, xmm [AVX]
|
|
// * VMOVSHDUP ymm, ymm [AVX]
|
|
// * VMOVSHDUP m256, ymm [AVX]
|
|
// * VMOVSHDUP zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVSHDUP m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVSHDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSHDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSHDUP m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSHDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVSHDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVSHDUP", 2, Operands { v0, v1 })
|
|
// VMOVSHDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSHDUP m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSHDUP ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), v[0], 0)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSHDUP m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSHDUP zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSHDUP m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVSHDUP xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSHDUP ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSHDUP m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVSHDUP m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVSHDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVSLDUP performs "Move Packed Single-FP Low and Duplicate".
|
|
//
|
|
// Mnemonic : VMOVSLDUP
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VMOVSLDUP xmm, xmm [AVX]
|
|
// * VMOVSLDUP m128, xmm [AVX]
|
|
// * VMOVSLDUP ymm, ymm [AVX]
|
|
// * VMOVSLDUP m256, ymm [AVX]
|
|
// * VMOVSLDUP zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVSLDUP m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVSLDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSLDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSLDUP m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVSLDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVSLDUP(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVSLDUP", 2, Operands { v0, v1 })
|
|
// VMOVSLDUP xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), v[0], 0)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSLDUP m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSLDUP ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), v[0], 0)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSLDUP m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSLDUP zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSLDUP m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVSLDUP xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSLDUP ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMOVSLDUP m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVSLDUP m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVSLDUP")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVSS performs "Move Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVSS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VMOVSS m32, xmm [AVX]
|
|
// * VMOVSS xmm, m32 [AVX]
|
|
// * VMOVSS xmm, xmm, xmm [AVX]
|
|
// * VMOVSS xmm, m32{k} [AVX512F]
|
|
// * VMOVSS m32, xmm{k}{z} [AVX512F]
|
|
// * VMOVSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMOVSS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMOVSS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VMOVSS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VMOVSS takes 2 or 3 operands")
|
|
}
|
|
// VMOVSS m32, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVSS xmm, m32
|
|
if len(vv) == 0 && isXMM(v0) && isM32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVSS xmm, xmm, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[0]), v[2], hlcode(v[1]))
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
// VMOVSS xmm, m32{k}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isM32k(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VMOVSS m32, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VMOVSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVUPD
|
|
// Supported forms : (15 forms)
|
|
//
|
|
// * VMOVUPD xmm, xmm [AVX]
|
|
// * VMOVUPD m128, xmm [AVX]
|
|
// * VMOVUPD ymm, ymm [AVX]
|
|
// * VMOVUPD m256, ymm [AVX]
|
|
// * VMOVUPD xmm, m128 [AVX]
|
|
// * VMOVUPD ymm, m256 [AVX]
|
|
// * VMOVUPD zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVUPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVUPD m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVUPD xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPD ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPD m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPD m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVUPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVUPD", 2, Operands { v0, v1 })
|
|
// VMOVUPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), v[1], 0)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVUPD ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), v[1], 0)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPD m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVUPD xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVUPD ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVUPD zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVUPD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPD m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVUPD xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVUPD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPD ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVUPD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPD m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVUPD m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVUPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMOVUPS
|
|
// Supported forms : (15 forms)
|
|
//
|
|
// * VMOVUPS xmm, xmm [AVX]
|
|
// * VMOVUPS m128, xmm [AVX]
|
|
// * VMOVUPS ymm, ymm [AVX]
|
|
// * VMOVUPS m256, ymm [AVX]
|
|
// * VMOVUPS xmm, m128 [AVX]
|
|
// * VMOVUPS ymm, m256 [AVX]
|
|
// * VMOVUPS zmm, m512{k}{z} [AVX512F]
|
|
// * VMOVUPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VMOVUPS m512, zmm{k}{z} [AVX512F]
|
|
// * VMOVUPS xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPS ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPS m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMOVUPS m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMOVUPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VMOVUPS", 2, Operands { v0, v1 })
|
|
// VMOVUPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), v[1], 0)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVUPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[0]), v[1], 0)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMOVUPS xmm, m128
|
|
if isXMM(v0) && isM128(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVUPS ymm, m256
|
|
if isYMM(v0) && isM256(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[0]), addr(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VMOVUPS zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 64)
|
|
})
|
|
}
|
|
// VMOVUPS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPS m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMOVUPS xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VMOVUPS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPS ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VMOVUPS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMOVUPS m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMOVUPS m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMOVUPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMPSADBW performs "Compute Multiple Packed Sums of Absolute Difference".
|
|
//
|
|
// Mnemonic : VMPSADBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VMPSADBW imm8, xmm, xmm, xmm [AVX]
|
|
// * VMPSADBW imm8, m128, xmm, xmm [AVX]
|
|
// * VMPSADBW imm8, ymm, ymm, ymm [AVX2]
|
|
// * VMPSADBW imm8, m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VMPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VMPSADBW", 4, Operands { v0, v1, v2, v3 })
|
|
// VMPSADBW imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VMPSADBW imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VMPSADBW imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x42)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VMPSADBW imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x42)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMPSADBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMULPD performs "Multiply Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMULPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMULPD xmm, xmm, xmm [AVX]
|
|
// * VMULPD m128, xmm, xmm [AVX]
|
|
// * VMULPD ymm, ymm, ymm [AVX]
|
|
// * VMULPD m256, ymm, ymm [AVX]
|
|
// * VMULPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMULPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMULPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMULPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMULPD takes 3 or 4 operands")
|
|
}
|
|
// VMULPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMULPD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMULPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMULPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMULPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMULPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMULPS performs "Multiply Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMULPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VMULPS xmm, xmm, xmm [AVX]
|
|
// * VMULPS m128, xmm, xmm [AVX]
|
|
// * VMULPS ymm, ymm, ymm [AVX]
|
|
// * VMULPS m256, ymm, ymm [AVX]
|
|
// * VMULPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VMULPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VMULPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VMULPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMULPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMULPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMULPS takes 3 or 4 operands")
|
|
}
|
|
// VMULPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VMULPS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMULPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VMULPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VMULPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMULPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMULSD performs "Multiply Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMULSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMULSD xmm, xmm, xmm [AVX]
|
|
// * VMULSD m64, xmm, xmm [AVX]
|
|
// * VMULSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMULSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMULSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMULSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMULSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMULSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMULSD takes 3 or 4 operands")
|
|
}
|
|
// VMULSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VMULSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMULSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMULSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VMULSS performs "Multiply Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VMULSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VMULSS xmm, xmm, xmm [AVX]
|
|
// * VMULSS m32, xmm, xmm [AVX]
|
|
// * VMULSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMULSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VMULSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VMULSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VMULSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VMULSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VMULSS takes 3 or 4 operands")
|
|
}
|
|
// VMULSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VMULSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VMULSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VMULSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VMULSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VMULSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VORPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VORPD xmm, xmm, xmm [AVX]
|
|
// * VORPD m128, xmm, xmm [AVX]
|
|
// * VORPD ymm, ymm, ymm [AVX]
|
|
// * VORPD m256, ymm, ymm [AVX]
|
|
// * VORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VORPD zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VORPD", 3, Operands { v0, v1, v2 })
|
|
// VORPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VORPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VORPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VORPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VORPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VORPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VORPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VORPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VORPS xmm, xmm, xmm [AVX]
|
|
// * VORPS m128, xmm, xmm [AVX]
|
|
// * VORPS ymm, ymm, ymm [AVX]
|
|
// * VORPS m256, ymm, ymm [AVX]
|
|
// * VORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VORPS zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VORPS", 3, Operands { v0, v1, v2 })
|
|
// VORPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VORPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VORPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VORPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VORPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VORPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VORPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VORPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPABSB performs "Packed Absolute Value of Byte Integers".
|
|
//
|
|
// Mnemonic : VPABSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPABSB xmm, xmm [AVX]
|
|
// * VPABSB m128, xmm [AVX]
|
|
// * VPABSB ymm, ymm [AVX2]
|
|
// * VPABSB m256, ymm [AVX2]
|
|
// * VPABSB zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPABSB m512, zmm{k}{z} [AVX512BW]
|
|
// * VPABSB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSB ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSB m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSB m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPABSB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPABSB", 2, Operands { v0, v1 })
|
|
// VPABSB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSB m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSB ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSB m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSB zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSB m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPABSB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSB ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x1c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSB m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPABSB m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPABSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPABSD performs "Packed Absolute Value of Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPABSD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPABSD xmm, xmm [AVX]
|
|
// * VPABSD m128, xmm [AVX]
|
|
// * VPABSD ymm, ymm [AVX2]
|
|
// * VPABSD m256, ymm [AVX2]
|
|
// * VPABSD m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPABSD zmm, zmm{k}{z} [AVX512F]
|
|
// * VPABSD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPABSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPABSD", 2, Operands { v0, v1 })
|
|
// VPABSD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSD ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSD m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSD m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPABSD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSD m128/m32bcst, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPABSD m256/m32bcst, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPABSD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPABSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPABSQ performs "Packed Absolute Value of Quadword Integers".
|
|
//
|
|
// Mnemonic : VPABSQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPABSQ m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPABSQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VPABSQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSQ m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPABSQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPABSQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPABSQ", 2, Operands { v0, v1 })
|
|
// VPABSQ m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPABSQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSQ m128/m64bcst, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPABSQ m256/m64bcst, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPABSQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSQ ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPABSQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPABSW performs "Packed Absolute Value of Word Integers".
|
|
//
|
|
// Mnemonic : VPABSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPABSW xmm, xmm [AVX]
|
|
// * VPABSW m128, xmm [AVX]
|
|
// * VPABSW ymm, ymm [AVX2]
|
|
// * VPABSW m256, ymm [AVX2]
|
|
// * VPABSW zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPABSW m512, zmm{k}{z} [AVX512BW]
|
|
// * VPABSW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSW ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSW m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPABSW m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPABSW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPABSW", 2, Operands { v0, v1 })
|
|
// VPABSW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSW ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSW m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPABSW zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSW m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPABSW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSW ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x1d)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPABSW m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPABSW m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x1d)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPABSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPACKSSDW performs "Pack Doublewords into Words with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPACKSSDW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPACKSSDW xmm, xmm, xmm [AVX]
|
|
// * VPACKSSDW m128, xmm, xmm [AVX]
|
|
// * VPACKSSDW ymm, ymm, ymm [AVX2]
|
|
// * VPACKSSDW m256, ymm, ymm [AVX2]
|
|
// * VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKSSDW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPACKSSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPACKSSDW", 3, Operands { v0, v1, v2 })
|
|
// VPACKSSDW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSDW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKSSDW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSDW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKSSDW m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPACKSSDW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSDW m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPACKSSDW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSDW m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPACKSSDW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x6b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPACKSSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPACKSSWB performs "Pack Words into Bytes with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPACKSSWB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPACKSSWB xmm, xmm, xmm [AVX]
|
|
// * VPACKSSWB m128, xmm, xmm [AVX]
|
|
// * VPACKSSWB ymm, ymm, ymm [AVX2]
|
|
// * VPACKSSWB m256, ymm, ymm [AVX2]
|
|
// * VPACKSSWB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKSSWB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKSSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKSSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPACKSSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPACKSSWB", 3, Operands { v0, v1, v2 })
|
|
// VPACKSSWB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSWB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKSSWB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSWB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKSSWB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSWB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPACKSSWB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSWB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPACKSSWB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKSSWB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPACKSSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPACKUSDW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPACKUSDW xmm, xmm, xmm [AVX]
|
|
// * VPACKUSDW m128, xmm, xmm [AVX]
|
|
// * VPACKUSDW ymm, ymm, ymm [AVX2]
|
|
// * VPACKUSDW m256, ymm, ymm [AVX2]
|
|
// * VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKUSDW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPACKUSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPACKUSDW", 3, Operands { v0, v1, v2 })
|
|
// VPACKUSDW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSDW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKUSDW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSDW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKUSDW m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPACKUSDW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSDW m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPACKUSDW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSDW m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPACKUSDW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x2b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPACKUSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPACKUSWB performs "Pack Words into Bytes with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPACKUSWB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPACKUSWB xmm, xmm, xmm [AVX]
|
|
// * VPACKUSWB m128, xmm, xmm [AVX]
|
|
// * VPACKUSWB ymm, ymm, ymm [AVX2]
|
|
// * VPACKUSWB m256, ymm, ymm [AVX2]
|
|
// * VPACKUSWB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKUSWB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPACKUSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPACKUSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPACKUSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPACKUSWB", 3, Operands { v0, v1, v2 })
|
|
// VPACKUSWB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSWB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKUSWB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSWB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPACKUSWB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSWB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPACKUSWB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSWB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPACKUSWB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x67)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPACKUSWB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x67)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPACKUSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDB performs "Add Packed Byte Integers".
|
|
//
|
|
// Mnemonic : VPADDB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDB xmm, xmm, xmm [AVX]
|
|
// * VPADDB m128, xmm, xmm [AVX]
|
|
// * VPADDB ymm, ymm, ymm [AVX2]
|
|
// * VPADDB m256, ymm, ymm [AVX2]
|
|
// * VPADDB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDB", 3, Operands { v0, v1, v2 })
|
|
// VPADDB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xfc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDD performs "Add Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPADDD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDD xmm, xmm, xmm [AVX]
|
|
// * VPADDD m128, xmm, xmm [AVX]
|
|
// * VPADDD ymm, ymm, ymm [AVX2]
|
|
// * VPADDD m256, ymm, ymm [AVX2]
|
|
// * VPADDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPADDD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPADDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDD", 3, Operands { v0, v1, v2 })
|
|
// VPADDD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfe)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPADDD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xfe)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDQ performs "Add Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPADDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDQ xmm, xmm, xmm [AVX]
|
|
// * VPADDQ m128, xmm, xmm [AVX]
|
|
// * VPADDQ ymm, ymm, ymm [AVX2]
|
|
// * VPADDQ m256, ymm, ymm [AVX2]
|
|
// * VPADDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPADDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPADDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPADDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDQ", 3, Operands { v0, v1, v2 })
|
|
// VPADDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xd4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPADDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDSB performs "Add Packed Signed Byte Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPADDSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDSB xmm, xmm, xmm [AVX]
|
|
// * VPADDSB m128, xmm, xmm [AVX]
|
|
// * VPADDSB ymm, ymm, ymm [AVX2]
|
|
// * VPADDSB m256, ymm, ymm [AVX2]
|
|
// * VPADDSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDSB", 3, Operands { v0, v1, v2 })
|
|
// VPADDSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDSW performs "Add Packed Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPADDSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDSW xmm, xmm, xmm [AVX]
|
|
// * VPADDSW m128, xmm, xmm [AVX]
|
|
// * VPADDSW ymm, ymm, ymm [AVX2]
|
|
// * VPADDSW m256, ymm, ymm [AVX2]
|
|
// * VPADDSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDSW", 3, Operands { v0, v1, v2 })
|
|
// VPADDSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPADDUSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDUSB xmm, xmm, xmm [AVX]
|
|
// * VPADDUSB m128, xmm, xmm [AVX]
|
|
// * VPADDUSB ymm, ymm, ymm [AVX2]
|
|
// * VPADDUSB m256, ymm, ymm [AVX2]
|
|
// * VPADDUSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDUSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDUSB", 3, Operands { v0, v1, v2 })
|
|
// VPADDUSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDUSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDUSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDUSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDUSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdc)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDUSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPADDUSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDUSW xmm, xmm, xmm [AVX]
|
|
// * VPADDUSW m128, xmm, xmm [AVX]
|
|
// * VPADDUSW ymm, ymm, ymm [AVX2]
|
|
// * VPADDUSW m256, ymm, ymm [AVX2]
|
|
// * VPADDUSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDUSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDUSW", 3, Operands { v0, v1, v2 })
|
|
// VPADDUSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDUSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDUSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDUSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDUSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDUSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xdd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDUSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPADDW performs "Add Packed Word Integers".
|
|
//
|
|
// Mnemonic : VPADDW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPADDW xmm, xmm, xmm [AVX]
|
|
// * VPADDW m128, xmm, xmm [AVX]
|
|
// * VPADDW ymm, ymm, ymm [AVX2]
|
|
// * VPADDW m256, ymm, ymm [AVX2]
|
|
// * VPADDW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPADDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPADDW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPADDW", 3, Operands { v0, v1, v2 })
|
|
// VPADDW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPADDW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPADDW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPADDW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xfd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPADDW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xfd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPALIGNR performs "Packed Align Right".
|
|
//
|
|
// Mnemonic : VPALIGNR
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPALIGNR imm8, xmm, xmm, xmm [AVX]
|
|
// * VPALIGNR imm8, m128, xmm, xmm [AVX]
|
|
// * VPALIGNR imm8, ymm, ymm, ymm [AVX2]
|
|
// * VPALIGNR imm8, m256, ymm, ymm [AVX2]
|
|
// * VPALIGNR imm8, zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPALIGNR imm8, m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPALIGNR imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPALIGNR imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPALIGNR imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPALIGNR imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPALIGNR(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPALIGNR", 4, Operands { v0, v1, v2, v3 })
|
|
// VPALIGNR imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, m512, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, m128, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPALIGNR imm8, m256, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPALIGNR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPAND performs "Packed Bitwise Logical AND".
|
|
//
|
|
// Mnemonic : VPAND
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPAND xmm, xmm, xmm [AVX]
|
|
// * VPAND m128, xmm, xmm [AVX]
|
|
// * VPAND ymm, ymm, ymm [AVX2]
|
|
// * VPAND m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPAND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPAND", 3, Operands { v0, v1, v2 })
|
|
// VPAND xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAND m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPAND ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAND m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPAND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPANDD performs "Bitwise Logical AND of Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPANDD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPANDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPANDD", 3, Operands { v0, v1, v2 })
|
|
// VPANDD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPANDD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPANDD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPANDD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPANDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPANDN performs "Packed Bitwise Logical AND NOT".
|
|
//
|
|
// Mnemonic : VPANDN
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPANDN xmm, xmm, xmm [AVX]
|
|
// * VPANDN m128, xmm, xmm [AVX]
|
|
// * VPANDN ymm, ymm, ymm [AVX2]
|
|
// * VPANDN m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPANDN(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPANDN", 3, Operands { v0, v1, v2 })
|
|
// VPANDN xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDN m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPANDN ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDN m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPANDN")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPANDND performs "Bitwise Logical AND NOT of Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPANDND
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPANDND m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDND zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDND m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDND xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDND m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDND ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPANDND", 3, Operands { v0, v1, v2 })
|
|
// VPANDND m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPANDND zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDND m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPANDND xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDND m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPANDND ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPANDND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPANDNQ performs "Bitwise Logical AND NOT of Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPANDNQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPANDNQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDNQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDNQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDNQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDNQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDNQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPANDNQ", 3, Operands { v0, v1, v2 })
|
|
// VPANDNQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPANDNQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDNQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPANDNQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDNQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdf)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPANDNQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdf)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPANDNQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPANDQ performs "Bitwise Logical AND of Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPANDQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPANDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPANDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPANDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPANDQ", 3, Operands { v0, v1, v2 })
|
|
// VPANDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPANDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPANDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPANDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPANDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPANDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPAVGB performs "Average Packed Byte Integers".
|
|
//
|
|
// Mnemonic : VPAVGB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPAVGB xmm, xmm, xmm [AVX]
|
|
// * VPAVGB m128, xmm, xmm [AVX]
|
|
// * VPAVGB ymm, ymm, ymm [AVX2]
|
|
// * VPAVGB m256, ymm, ymm [AVX2]
|
|
// * VPAVGB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPAVGB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPAVGB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPAVGB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPAVGB", 3, Operands { v0, v1, v2 })
|
|
// VPAVGB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPAVGB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPAVGB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPAVGB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPAVGB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe0)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPAVGB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPAVGW performs "Average Packed Word Integers".
|
|
//
|
|
// Mnemonic : VPAVGW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPAVGW xmm, xmm, xmm [AVX]
|
|
// * VPAVGW m128, xmm, xmm [AVX]
|
|
// * VPAVGW ymm, ymm, ymm [AVX2]
|
|
// * VPAVGW m256, ymm, ymm [AVX2]
|
|
// * VPAVGW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPAVGW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPAVGW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPAVGW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPAVGW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPAVGW", 3, Operands { v0, v1, v2 })
|
|
// VPAVGW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPAVGW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPAVGW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPAVGW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPAVGW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPAVGW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPAVGW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDD performs "Blend Packed Doublewords".
|
|
//
|
|
// Mnemonic : VPBLENDD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPBLENDD imm8, xmm, xmm, xmm [AVX2]
|
|
// * VPBLENDD imm8, m128, xmm, xmm [AVX2]
|
|
// * VPBLENDD imm8, ymm, ymm, ymm [AVX2]
|
|
// * VPBLENDD imm8, m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPBLENDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPBLENDD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDD imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDD imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDMB performs "Blend Byte Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VPBLENDMB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPBLENDMB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBLENDMB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBLENDMB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPBLENDMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDMB", 3, Operands { v0, v1, v2 })
|
|
// VPBLENDMB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPBLENDMB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPBLENDMB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDMB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDMD performs "Blend Doubleword Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VPBLENDMD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPBLENDMD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPBLENDMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDMD", 3, Operands { v0, v1, v2 })
|
|
// VPBLENDMD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPBLENDMD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPBLENDMD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPBLENDMD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDMD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDMQ performs "Blend Quadword Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VPBLENDMQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPBLENDMQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBLENDMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPBLENDMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDMQ", 3, Operands { v0, v1, v2 })
|
|
// VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPBLENDMQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPBLENDMQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPBLENDMQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDMQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDMW performs "Blend Word Vectors Using an OpMask Control".
|
|
//
|
|
// Mnemonic : VPBLENDMW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPBLENDMW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBLENDMW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBLENDMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBLENDMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPBLENDMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDMW", 3, Operands { v0, v1, v2 })
|
|
// VPBLENDMW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPBLENDMW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPBLENDMW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDMW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDMW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDVB performs "Variable Blend Packed Bytes".
|
|
//
|
|
// Mnemonic : VPBLENDVB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPBLENDVB xmm, xmm, xmm, xmm [AVX]
|
|
// * VPBLENDVB xmm, m128, xmm, xmm [AVX]
|
|
// * VPBLENDVB ymm, ymm, ymm, ymm [AVX2]
|
|
// * VPBLENDVB ymm, m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDVB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPBLENDVB xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPBLENDVB xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPBLENDVB ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPBLENDVB ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDVB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBLENDW performs "Blend Packed Words".
|
|
//
|
|
// Mnemonic : VPBLENDW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPBLENDW imm8, xmm, xmm, xmm [AVX]
|
|
// * VPBLENDW imm8, m128, xmm, xmm [AVX]
|
|
// * VPBLENDW imm8, ymm, ymm, ymm [AVX2]
|
|
// * VPBLENDW imm8, m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPBLENDW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPBLENDW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPBLENDW imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDW imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDW imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPBLENDW imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBLENDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTB performs "Broadcast Byte Integer".
|
|
//
|
|
// Mnemonic : VPBROADCASTB
|
|
// Supported forms : (13 forms)
|
|
//
|
|
// * VPBROADCASTB xmm, xmm [AVX2]
|
|
// * VPBROADCASTB m8, xmm [AVX2]
|
|
// * VPBROADCASTB xmm, ymm [AVX2]
|
|
// * VPBROADCASTB m8, ymm [AVX2]
|
|
// * VPBROADCASTB r32, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTB xmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTB m8, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTB r32, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTB r32, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTB xmm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTB m8, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTB m8, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPBROADCASTB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTB", 2, Operands { v0, v1 })
|
|
// VPBROADCASTB xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB m8, xmm
|
|
if isM8(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTB xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB m8, ymm
|
|
if isM8(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTB r32, zmm{k}{z}
|
|
if isReg32(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB m8, zmm{k}{z}
|
|
if isM8(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTB r32, xmm{k}{z}
|
|
if isReg32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB r32, ymm{k}{z}
|
|
if isReg32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x78)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTB m8, xmm{k}{z}
|
|
if isM8(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTB m8, ymm{k}{z}
|
|
if isM8(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x78)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTD performs "Broadcast Doubleword Integer".
|
|
//
|
|
// Mnemonic : VPBROADCASTD
|
|
// Supported forms : (13 forms)
|
|
//
|
|
// * VPBROADCASTD xmm, xmm [AVX2]
|
|
// * VPBROADCASTD m32, xmm [AVX2]
|
|
// * VPBROADCASTD xmm, ymm [AVX2]
|
|
// * VPBROADCASTD m32, ymm [AVX2]
|
|
// * VPBROADCASTD r32, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTD xmm, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTD m32, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTD r32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTD r32, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTD m32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTD m32, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPBROADCASTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTD", 2, Operands { v0, v1 })
|
|
// VPBROADCASTD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD m32, ymm
|
|
if isM32(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTD r32, zmm{k}{z}
|
|
if isReg32(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD m32, zmm{k}{z}
|
|
if isM32(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPBROADCASTD r32, xmm{k}{z}
|
|
if isReg32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD r32, ymm{k}{z}
|
|
if isReg32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x58)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTD m32, xmm{k}{z}
|
|
if isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPBROADCASTD m32, ymm{k}{z}
|
|
if isM32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x58)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTMB2Q performs "Broadcast Low Byte of Mask Register to Packed Quadword Values".
|
|
//
|
|
// Mnemonic : VPBROADCASTMB2Q
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPBROADCASTMB2Q k, xmm [AVX512CD,AVX512VL]
|
|
// * VPBROADCASTMB2Q k, ymm [AVX512CD,AVX512VL]
|
|
// * VPBROADCASTMB2Q k, zmm [AVX512CD]
|
|
//
|
|
func (self *Program) VPBROADCASTMB2Q(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTMB2Q", 2, Operands { v0, v1 })
|
|
// VPBROADCASTMB2Q k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTMB2Q k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x28)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTMB2Q k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x2a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTMB2Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTMW2D performs "Broadcast Low Word of Mask Register to Packed Doubleword Values".
|
|
//
|
|
// Mnemonic : VPBROADCASTMW2D
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPBROADCASTMW2D k, xmm [AVX512CD,AVX512VL]
|
|
// * VPBROADCASTMW2D k, ymm [AVX512CD,AVX512VL]
|
|
// * VPBROADCASTMW2D k, zmm [AVX512CD]
|
|
//
|
|
func (self *Program) VPBROADCASTMW2D(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTMW2D", 2, Operands { v0, v1 })
|
|
// VPBROADCASTMW2D k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x08)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTMW2D k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x28)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTMW2D k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTMW2D")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTQ performs "Broadcast Quadword Integer".
|
|
//
|
|
// Mnemonic : VPBROADCASTQ
|
|
// Supported forms : (13 forms)
|
|
//
|
|
// * VPBROADCASTQ xmm, xmm [AVX2]
|
|
// * VPBROADCASTQ m64, xmm [AVX2]
|
|
// * VPBROADCASTQ xmm, ymm [AVX2]
|
|
// * VPBROADCASTQ m64, ymm [AVX2]
|
|
// * VPBROADCASTQ r64, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTQ xmm, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTQ m64, zmm{k}{z} [AVX512F]
|
|
// * VPBROADCASTQ r64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTQ r64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTQ m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPBROADCASTQ m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPBROADCASTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTQ", 2, Operands { v0, v1 })
|
|
// VPBROADCASTQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTQ r64, zmm{k}{z}
|
|
if isReg64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPBROADCASTQ r64, xmm{k}{z}
|
|
if isReg64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ r64, ymm{k}{z}
|
|
if isReg64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x59)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTQ m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPBROADCASTQ m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x59)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPBROADCASTW performs "Broadcast Word Integer".
|
|
//
|
|
// Mnemonic : VPBROADCASTW
|
|
// Supported forms : (13 forms)
|
|
//
|
|
// * VPBROADCASTW xmm, xmm [AVX2]
|
|
// * VPBROADCASTW m16, xmm [AVX2]
|
|
// * VPBROADCASTW xmm, ymm [AVX2]
|
|
// * VPBROADCASTW m16, ymm [AVX2]
|
|
// * VPBROADCASTW r32, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTW xmm, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTW m16, zmm{k}{z} [AVX512BW]
|
|
// * VPBROADCASTW r32, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTW r32, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTW m16, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPBROADCASTW m16, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPBROADCASTW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPBROADCASTW", 2, Operands { v0, v1 })
|
|
// VPBROADCASTW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW m16, xmm
|
|
if isM16(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTW xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW m16, ymm
|
|
if isM16(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPBROADCASTW r32, zmm{k}{z}
|
|
if isReg32(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW m16, zmm{k}{z}
|
|
if isM16(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 2)
|
|
})
|
|
}
|
|
// VPBROADCASTW r32, xmm{k}{z}
|
|
if isReg32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW r32, ymm{k}{z}
|
|
if isReg32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x7b)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x79)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPBROADCASTW m16, xmm{k}{z}
|
|
if isM16(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 2)
|
|
})
|
|
}
|
|
// VPBROADCASTW m16, ymm{k}{z}
|
|
if isM16(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x79)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 2)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPBROADCASTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCLMULQDQ performs "Carry-Less Quadword Multiplication".
|
|
//
|
|
// Mnemonic : VPCLMULQDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCLMULQDQ imm8, xmm, xmm, xmm [AVX,PCLMULQDQ]
|
|
// * VPCLMULQDQ imm8, m128, xmm, xmm [AVX,PCLMULQDQ]
|
|
//
|
|
func (self *Program) VPCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCLMULQDQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCLMULQDQ imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX | ISA_PCLMULQDQ)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCLMULQDQ imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX | ISA_PCLMULQDQ)
|
|
p.domain = DomainCrypto
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCLMULQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMOV performs "Packed Conditional Move".
|
|
//
|
|
// Mnemonic : VPCMOV
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMOV xmm, xmm, xmm, xmm [XOP]
|
|
// * VPCMOV m128, xmm, xmm, xmm [XOP]
|
|
// * VPCMOV xmm, m128, xmm, xmm [XOP]
|
|
// * VPCMOV ymm, ymm, ymm, ymm [XOP]
|
|
// * VPCMOV m256, ymm, ymm, ymm [XOP]
|
|
// * VPCMOV ymm, m256, ymm, ymm [XOP]
|
|
//
|
|
func (self *Program) VPCMOV(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMOV", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMOV xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPCMOV m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPCMOV xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPCMOV ymm, ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfc ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa2)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPCMOV m256, ymm, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x84, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPCMOV ymm, m256, ymm, ymm
|
|
if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x04, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMOV")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPB performs "Compare Packed Signed Byte Values".
|
|
//
|
|
// Mnemonic : VPCMPB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPB imm8, zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPB imm8, m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPB imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPB imm8, m512, zmm, k{k}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPB imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPB imm8, m128, xmm, k{k}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPB imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPB imm8, m256, ymm, k{k}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPD performs "Compare Packed Signed Doubleword Values".
|
|
//
|
|
// Mnemonic : VPCMPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPD imm8, m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPD imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPD imm8, m512/m32bcst, zmm, k{k}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPD imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPD imm8, m128/m32bcst, xmm, k{k}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPD imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPD imm8, m256/m32bcst, ymm, k{k}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPD imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPEQB performs "Compare Packed Byte Data for Equality".
|
|
//
|
|
// Mnemonic : VPCMPEQB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPEQB xmm, xmm, xmm [AVX]
|
|
// * VPCMPEQB m128, xmm, xmm [AVX]
|
|
// * VPCMPEQB ymm, ymm, ymm [AVX2]
|
|
// * VPCMPEQB m256, ymm, ymm [AVX2]
|
|
// * VPCMPEQB zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPEQB m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPEQB xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQB m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQB ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQB m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPEQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPEQB", 3, Operands { v0, v1, v2 })
|
|
// VPCMPEQB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQB zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQB m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPEQB xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQB m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPEQB ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x74)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQB m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x74)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPEQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPEQD performs "Compare Packed Doubleword Data for Equality".
|
|
//
|
|
// Mnemonic : VPCMPEQD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPEQD xmm, xmm, xmm [AVX]
|
|
// * VPCMPEQD m128, xmm, xmm [AVX]
|
|
// * VPCMPEQD ymm, ymm, ymm [AVX2]
|
|
// * VPCMPEQD m256, ymm, ymm [AVX2]
|
|
// * VPCMPEQD m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPEQD zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPEQD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQD xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQD ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPEQD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPEQD", 3, Operands { v0, v1, v2 })
|
|
// VPCMPEQD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQD m512/m32bcst, zmm, k{k}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPEQD zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQD m128/m32bcst, xmm, k{k}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPEQD xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQD m256/m32bcst, ymm, k{k}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCMPEQD ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPEQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPEQQ performs "Compare Packed Quadword Data for Equality".
|
|
//
|
|
// Mnemonic : VPCMPEQQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPEQQ xmm, xmm, xmm [AVX]
|
|
// * VPCMPEQQ m128, xmm, xmm [AVX]
|
|
// * VPCMPEQQ ymm, ymm, ymm [AVX2]
|
|
// * VPCMPEQQ m256, ymm, ymm [AVX2]
|
|
// * VPCMPEQQ m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPEQQ zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPEQQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQQ xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPEQQ ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPEQQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPEQQ", 3, Operands { v0, v1, v2 })
|
|
// VPCMPEQQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQQ m512/m64bcst, zmm, k{k}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPEQQ zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQQ m128/m64bcst, xmm, k{k}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPEQQ xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQQ m256/m64bcst, ymm, k{k}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x29)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCMPEQQ ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPEQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPEQW performs "Compare Packed Word Data for Equality".
|
|
//
|
|
// Mnemonic : VPCMPEQW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPEQW xmm, xmm, xmm [AVX]
|
|
// * VPCMPEQW m128, xmm, xmm [AVX]
|
|
// * VPCMPEQW ymm, ymm, ymm [AVX2]
|
|
// * VPCMPEQW m256, ymm, ymm [AVX2]
|
|
// * VPCMPEQW zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPEQW m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPEQW xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQW m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQW ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPEQW m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPEQW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPEQW", 3, Operands { v0, v1, v2 })
|
|
// VPCMPEQW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPEQW zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQW m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPEQW xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQW m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPEQW ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPEQW m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPEQW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index".
|
|
//
|
|
// Mnemonic : VPCMPESTRI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCMPESTRI imm8, xmm, xmm [AVX]
|
|
// * VPCMPESTRI imm8, m128, xmm [AVX]
|
|
//
|
|
func (self *Program) VPCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPESTRI", 3, Operands { v0, v1, v2 })
|
|
// VPCMPESTRI imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPESTRI imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPESTRI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask".
|
|
//
|
|
// Mnemonic : VPCMPESTRM
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCMPESTRM imm8, xmm, xmm [AVX]
|
|
// * VPCMPESTRM imm8, m128, xmm [AVX]
|
|
//
|
|
func (self *Program) VPCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPESTRM", 3, Operands { v0, v1, v2 })
|
|
// VPCMPESTRM imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPESTRM imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPESTRM")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than".
|
|
//
|
|
// Mnemonic : VPCMPGTB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPGTB xmm, xmm, xmm [AVX]
|
|
// * VPCMPGTB m128, xmm, xmm [AVX]
|
|
// * VPCMPGTB ymm, ymm, ymm [AVX2]
|
|
// * VPCMPGTB m256, ymm, ymm [AVX2]
|
|
// * VPCMPGTB zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPGTB m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPGTB xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTB m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTB ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTB m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPGTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPGTB", 3, Operands { v0, v1, v2 })
|
|
// VPCMPGTB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTB zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTB m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPGTB xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTB m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPGTB ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x64)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTB m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x64)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPGTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than".
|
|
//
|
|
// Mnemonic : VPCMPGTD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPGTD xmm, xmm, xmm [AVX]
|
|
// * VPCMPGTD m128, xmm, xmm [AVX]
|
|
// * VPCMPGTD ymm, ymm, ymm [AVX2]
|
|
// * VPCMPGTD m256, ymm, ymm [AVX2]
|
|
// * VPCMPGTD m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPGTD zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPGTD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTD xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTD ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPGTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPGTD", 3, Operands { v0, v1, v2 })
|
|
// VPCMPGTD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTD m512/m32bcst, zmm, k{k}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPGTD zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTD m128/m32bcst, xmm, k{k}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPGTD xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTD m256/m32bcst, ymm, k{k}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x66)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCMPGTD ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x66)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPGTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPGTQ performs "Compare Packed Data for Greater Than".
|
|
//
|
|
// Mnemonic : VPCMPGTQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPGTQ xmm, xmm, xmm [AVX]
|
|
// * VPCMPGTQ m128, xmm, xmm [AVX]
|
|
// * VPCMPGTQ ymm, ymm, ymm [AVX2]
|
|
// * VPCMPGTQ m256, ymm, ymm [AVX2]
|
|
// * VPCMPGTQ m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPGTQ zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPGTQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTQ xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPGTQ ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPGTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPGTQ", 3, Operands { v0, v1, v2 })
|
|
// VPCMPGTQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTQ m512/m64bcst, zmm, k{k}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPGTQ zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTQ m128/m64bcst, xmm, k{k}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPGTQ xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTQ m256/m64bcst, ymm, k{k}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x37)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCMPGTQ ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x37)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPGTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPGTW performs "Compare Packed Signed Word Integers for Greater Than".
|
|
//
|
|
// Mnemonic : VPCMPGTW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPCMPGTW xmm, xmm, xmm [AVX]
|
|
// * VPCMPGTW m128, xmm, xmm [AVX]
|
|
// * VPCMPGTW ymm, ymm, ymm [AVX2]
|
|
// * VPCMPGTW m256, ymm, ymm [AVX2]
|
|
// * VPCMPGTW zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPGTW m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPGTW xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTW m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTW ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPGTW m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPGTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPGTW", 3, Operands { v0, v1, v2 })
|
|
// VPCMPGTW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPCMPGTW zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTW m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCMPGTW xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTW m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCMPGTW ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x65)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCMPGTW m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x65)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPGTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index".
|
|
//
|
|
// Mnemonic : VPCMPISTRI
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCMPISTRI imm8, xmm, xmm [AVX]
|
|
// * VPCMPISTRI imm8, m128, xmm [AVX]
|
|
//
|
|
func (self *Program) VPCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPISTRI", 3, Operands { v0, v1, v2 })
|
|
// VPCMPISTRI imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x63)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPISTRI imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x63)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPISTRI")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask".
|
|
//
|
|
// Mnemonic : VPCMPISTRM
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCMPISTRM imm8, xmm, xmm [AVX]
|
|
// * VPCMPISTRM imm8, m128, xmm [AVX]
|
|
//
|
|
func (self *Program) VPCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPISTRM", 3, Operands { v0, v1, v2 })
|
|
// VPCMPISTRM imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPISTRM imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPISTRM")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPQ performs "Compare Packed Signed Quadword Values".
|
|
//
|
|
// Mnemonic : VPCMPQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPQ imm8, m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPQ imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPQ imm8, m512/m64bcst, zmm, k{k}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPQ imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPQ imm8, m128/m64bcst, xmm, k{k}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPQ imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPQ imm8, m256/m64bcst, ymm, k{k}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPQ imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x1f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPUB performs "Compare Packed Unsigned Byte Values".
|
|
//
|
|
// Mnemonic : VPCMPUB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPUB imm8, zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPUB imm8, m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPUB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPUB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPUB imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUB imm8, m512, zmm, k{k}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUB imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUB imm8, m128, xmm, k{k}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUB imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUB imm8, m256, ymm, k{k}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPUD performs "Compare Packed Unsigned Doubleword Values".
|
|
//
|
|
// Mnemonic : VPCMPUD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPUD imm8, m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPUD imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPUD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPUD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPUD imm8, m512/m32bcst, zmm, k{k}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUD imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUD imm8, m128/m32bcst, xmm, k{k}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUD imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUD imm8, m256/m32bcst, ymm, k{k}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUD imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPUQ performs "Compare Packed Unsigned Quadword Values".
|
|
//
|
|
// Mnemonic : VPCMPUQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPUQ imm8, m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPCMPUQ imm8, zmm, zmm, k{k} [AVX512F]
|
|
// * VPCMPUQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPCMPUQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPUQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPUQ imm8, m512/m64bcst, zmm, k{k}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUQ imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUQ imm8, m128/m64bcst, xmm, k{k}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUQ imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUQ imm8, m256/m64bcst, ymm, k{k}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
|
|
m.emit(0x1e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUQ imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x1e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPUW performs "Compare Packed Unsigned Word Values".
|
|
//
|
|
// Mnemonic : VPCMPUW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPUW imm8, zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPUW imm8, m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPUW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPUW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPUW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPUW imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUW imm8, m512, zmm, k{k}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUW imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUW imm8, m128, xmm, k{k}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUW imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPUW imm8, m256, ymm, k{k}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCMPW performs "Compare Packed Signed Word Values".
|
|
//
|
|
// Mnemonic : VPCMPW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCMPW imm8, zmm, zmm, k{k} [AVX512BW]
|
|
// * VPCMPW imm8, m512, zmm, k{k} [AVX512BW]
|
|
// * VPCMPW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPCMPW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPCMPW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCMPW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCMPW imm8, zmm, zmm, k{k}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPW imm8, m512, zmm, k{k}
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPW imm8, xmm, xmm, k{k}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPW imm8, m128, xmm, k{k}
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPW imm8, ymm, ymm, k{k}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCMPW imm8, m256, ymm, k{k}
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCMPW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMB performs "Compare Packed Signed Byte Integers".
|
|
//
|
|
// Mnemonic : VPCOMB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMB imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMB imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMB imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMB imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xcc)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMD performs "Compare Packed Signed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPCOMD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMD imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMD imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xce)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xce)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMPRESSD performs "Store Sparse Packed Doubleword Integer Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPCOMPRESSD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCOMPRESSD zmm, zmm{k}{z} [AVX512F]
|
|
// * VPCOMPRESSD zmm, m512{k}{z} [AVX512F]
|
|
// * VPCOMPRESSD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSD xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSD ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCOMPRESSD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMPRESSD", 2, Operands { v0, v1 })
|
|
// VPCOMPRESSD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSD zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPCOMPRESSD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSD xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPCOMPRESSD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSD ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMPRESSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMPRESSQ performs "Store Sparse Packed Quadword Integer Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPCOMPRESSQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCOMPRESSQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VPCOMPRESSQ zmm, m512{k}{z} [AVX512F]
|
|
// * VPCOMPRESSQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSQ xmm, m128{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPCOMPRESSQ ymm, m256{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPCOMPRESSQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMPRESSQ", 2, Operands { v0, v1 })
|
|
// VPCOMPRESSQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSQ zmm, m512{k}{z}
|
|
if isZMM(v0) && isM512kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPCOMPRESSQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSQ xmm, m128{k}{z}
|
|
if isEVEXXMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPCOMPRESSQ ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x8b)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPCOMPRESSQ ymm, m256{k}{z}
|
|
if isEVEXYMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x8b)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMPRESSQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMQ performs "Compare Packed Signed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPCOMQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMQ imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMQ imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMQ imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xcf)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMQ imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xcf)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMUB performs "Compare Packed Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : VPCOMUB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMUB imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMUB imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMUB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMUB imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xec)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMUB imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xec)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMUD performs "Compare Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPCOMUD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMUD imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMUD imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMUD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMUD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMUD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMUQ performs "Compare Packed Unsigned Quadword Integers".
|
|
//
|
|
// Mnemonic : VPCOMUQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMUQ imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMUQ imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMUQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMUQ imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMUQ imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMUW performs "Compare Packed Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : VPCOMUW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMUW imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMUW imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMUW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMUW imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xed)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMUW imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xed)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCOMW performs "Compare Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : VPCOMW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPCOMW imm8, xmm, xmm, xmm [XOP]
|
|
// * VPCOMW imm8, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPCOMW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPCOMW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPCOMW imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPCOMW imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xcd)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCOMW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCONFLICTD performs "Detect Conflicts Within a Vector of Packed Doubleword Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPCONFLICTD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCONFLICTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTD m512/m32bcst, zmm{k}{z} [AVX512CD]
|
|
// * VPCONFLICTD xmm, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTD ymm, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTD zmm, zmm{k}{z} [AVX512CD]
|
|
//
|
|
func (self *Program) VPCONFLICTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPCONFLICTD", 2, Operands { v0, v1 })
|
|
// VPCONFLICTD m128/m32bcst, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCONFLICTD m256/m32bcst, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCONFLICTD m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCONFLICTD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCONFLICTD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCONFLICTD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCONFLICTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPCONFLICTQ performs "Detect Conflicts Within a Vector of Packed Quadword Values into Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPCONFLICTQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPCONFLICTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTQ m512/m64bcst, zmm{k}{z} [AVX512CD]
|
|
// * VPCONFLICTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPCONFLICTQ zmm, zmm{k}{z} [AVX512CD]
|
|
//
|
|
func (self *Program) VPCONFLICTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPCONFLICTQ", 2, Operands { v0, v1 })
|
|
// VPCONFLICTQ m128/m64bcst, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPCONFLICTQ m256/m64bcst, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPCONFLICTQ m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPCONFLICTQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCONFLICTQ ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPCONFLICTQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPCONFLICTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERM2F128 performs "Permute Floating-Point Values".
|
|
//
|
|
// Mnemonic : VPERM2F128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPERM2F128 imm8, ymm, ymm, ymm [AVX]
|
|
// * VPERM2F128 imm8, m256, ymm, ymm [AVX]
|
|
//
|
|
func (self *Program) VPERM2F128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPERM2F128", 4, Operands { v0, v1, v2, v3 })
|
|
// VPERM2F128 imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x06)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERM2F128 imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x06)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERM2F128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERM2I128 performs "Permute 128-Bit Integer Values".
|
|
//
|
|
// Mnemonic : VPERM2I128
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPERM2I128 imm8, ymm, ymm, ymm [AVX2]
|
|
// * VPERM2I128 imm8, m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPERM2I128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPERM2I128", 4, Operands { v0, v1, v2, v3 })
|
|
// VPERM2I128 imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERM2I128 imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERM2I128")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMB performs "Permute Byte Integers".
|
|
//
|
|
// Mnemonic : VPERMB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMB m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMB m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMB zmm, zmm, zmm{k}{z} [AVX512VBMI]
|
|
// * VPERMB m512, zmm, zmm{k}{z} [AVX512VBMI]
|
|
//
|
|
func (self *Program) VPERMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMB", 3, Operands { v0, v1, v2 })
|
|
// VPERMB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMD performs "Permute Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPERMD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMD ymm, ymm, ymm [AVX2]
|
|
// * VPERMD m256, ymm, ymm [AVX2]
|
|
// * VPERMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMD", 3, Operands { v0, v1, v2 })
|
|
// VPERMD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x36)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x36)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x36)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x36)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x36)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x36)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2B performs "Full Permute of Bytes From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2B
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMI2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMI2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMI2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMI2B zmm, zmm, zmm{k}{z} [AVX512VBMI]
|
|
// * VPERMI2B m512, zmm, zmm{k}{z} [AVX512VBMI]
|
|
//
|
|
func (self *Program) VPERMI2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2B", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2B xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2B m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2B ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2B m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMI2B zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2B m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2D performs "Full Permute of Doublewords From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2D
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2D zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMI2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2D", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2D m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMI2D zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2D m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2D xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2D m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMI2D ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2D")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMI2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2PD", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMI2PD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2PD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMI2PD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2PS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMI2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2PS", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMI2PS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2PS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x77)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMI2PS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x77)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2Q performs "Full Permute of Quadwords From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2Q
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2Q zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMI2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMI2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2Q", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2Q m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMI2Q zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2Q m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2Q xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2Q m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x76)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMI2Q ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x76)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMI2W performs "Full Permute of Words From Two Tables Overwriting the Index".
|
|
//
|
|
// Mnemonic : VPERMI2W
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMI2W zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMI2W m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMI2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMI2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMI2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMI2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMI2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMI2W", 3, Operands { v0, v1, v2 })
|
|
// VPERMI2W zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2W m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMI2W xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2W m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMI2W ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x75)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMI2W m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x75)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMI2W")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMIL2PD performs "Permute Two-Source Double-Precision Floating-Point Vectors".
|
|
//
|
|
// Mnemonic : VPERMIL2PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMIL2PD imm4, xmm, xmm, xmm, xmm [XOP]
|
|
// * VPERMIL2PD imm4, m128, xmm, xmm, xmm [XOP]
|
|
// * VPERMIL2PD imm4, xmm, m128, xmm, xmm [XOP]
|
|
// * VPERMIL2PD imm4, ymm, ymm, ymm, ymm [XOP]
|
|
// * VPERMIL2PD imm4, m256, ymm, ymm, ymm [XOP]
|
|
// * VPERMIL2PD imm4, ymm, m256, ymm, ymm [XOP]
|
|
//
|
|
func (self *Program) VPERMIL2PD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction {
|
|
p := self.alloc("VPERMIL2PD", 5, Operands { v0, v1, v2, v3, v4 })
|
|
// VPERMIL2PD imm4, xmm, xmm, xmm, xmm
|
|
if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PD imm4, m128, xmm, xmm, xmm
|
|
if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3]))
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[4]), addr(v[1]), 1)
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PD imm4, xmm, m128, xmm, xmm
|
|
if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3]))
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[4]), addr(v[2]), 1)
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PD imm4, ymm, ymm, ymm, ymm
|
|
if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x49)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PD imm4, m256, ymm, ymm, ymm
|
|
if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3]))
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[4]), addr(v[1]), 1)
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PD imm4, ymm, m256, ymm, ymm
|
|
if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3]))
|
|
m.emit(0x49)
|
|
m.mrsd(lcode(v[4]), addr(v[2]), 1)
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMIL2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMIL2PS performs "Permute Two-Source Single-Precision Floating-Point Vectors".
|
|
//
|
|
// Mnemonic : VPERMIL2PS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMIL2PS imm4, xmm, xmm, xmm, xmm [XOP]
|
|
// * VPERMIL2PS imm4, m128, xmm, xmm, xmm [XOP]
|
|
// * VPERMIL2PS imm4, xmm, m128, xmm, xmm [XOP]
|
|
// * VPERMIL2PS imm4, ymm, ymm, ymm, ymm [XOP]
|
|
// * VPERMIL2PS imm4, m256, ymm, ymm, ymm [XOP]
|
|
// * VPERMIL2PS imm4, ymm, m256, ymm, ymm [XOP]
|
|
//
|
|
func (self *Program) VPERMIL2PS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction {
|
|
p := self.alloc("VPERMIL2PS", 5, Operands { v0, v1, v2, v3, v4 })
|
|
// VPERMIL2PS imm4, xmm, xmm, xmm, xmm
|
|
if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PS imm4, m128, xmm, xmm, xmm
|
|
if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3]))
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[4]), addr(v[1]), 1)
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PS imm4, xmm, m128, xmm, xmm
|
|
if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3]))
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[4]), addr(v[2]), 1)
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PS imm4, ymm, ymm, ymm, ymm
|
|
if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit(0x48)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PS imm4, m256, ymm, ymm, ymm
|
|
if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3]))
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[4]), addr(v[1]), 1)
|
|
m.emit((hlcode(v[2]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
// VPERMIL2PS imm4, ymm, m256, ymm, ymm
|
|
if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3]))
|
|
m.emit(0x48)
|
|
m.mrsd(lcode(v[4]), addr(v[2]), 1)
|
|
m.emit((hlcode(v[1]) << 4) | imml(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMIL2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMILPD performs "Permute Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VPERMILPD
|
|
// Supported forms : (20 forms)
|
|
//
|
|
// * VPERMILPD imm8, xmm, xmm [AVX]
|
|
// * VPERMILPD xmm, xmm, xmm [AVX]
|
|
// * VPERMILPD m128, xmm, xmm [AVX]
|
|
// * VPERMILPD imm8, m128, xmm [AVX]
|
|
// * VPERMILPD imm8, ymm, ymm [AVX]
|
|
// * VPERMILPD ymm, ymm, ymm [AVX]
|
|
// * VPERMILPD m256, ymm, ymm [AVX]
|
|
// * VPERMILPD imm8, m256, ymm [AVX]
|
|
// * VPERMILPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMILPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMILPD", 3, Operands { v0, v1, v2 })
|
|
// VPERMILPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMILPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMILPD imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMILPD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMILPD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMILPD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x0d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMILPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMILPS performs "Permute Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VPERMILPS
|
|
// Supported forms : (20 forms)
|
|
//
|
|
// * VPERMILPS imm8, xmm, xmm [AVX]
|
|
// * VPERMILPS xmm, xmm, xmm [AVX]
|
|
// * VPERMILPS m128, xmm, xmm [AVX]
|
|
// * VPERMILPS imm8, m128, xmm [AVX]
|
|
// * VPERMILPS imm8, ymm, ymm [AVX]
|
|
// * VPERMILPS ymm, ymm, ymm [AVX]
|
|
// * VPERMILPS m256, ymm, ymm [AVX]
|
|
// * VPERMILPS imm8, m256, ymm [AVX]
|
|
// * VPERMILPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPS imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMILPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMILPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMILPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMILPS", 3, Operands { v0, v1, v2 })
|
|
// VPERMILPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMILPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMILPS imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMILPS imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMILPS imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x0c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMILPS imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMILPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x0c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMILPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMPD performs "Permute Double-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VPERMPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPERMPD imm8, ymm, ymm [AVX2]
|
|
// * VPERMPD imm8, m256, ymm [AVX2]
|
|
// * VPERMPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPERMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMPD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMPD", 3, Operands { v0, v1, v2 })
|
|
// VPERMPD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfd)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMPD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMPD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMPS performs "Permute Single-Precision Floating-Point Elements".
|
|
//
|
|
// Mnemonic : VPERMPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMPS ymm, ymm, ymm [AVX2]
|
|
// * VPERMPS m256, ymm, ymm [AVX2]
|
|
// * VPERMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMPS", 3, Operands { v0, v1, v2 })
|
|
// VPERMPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPERMPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMQ performs "Permute Quadword Integers".
|
|
//
|
|
// Mnemonic : VPERMQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPERMQ imm8, ymm, ymm [AVX2]
|
|
// * VPERMQ imm8, m256, ymm [AVX2]
|
|
// * VPERMQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPERMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMQ", 3, Operands { v0, v1, v2 })
|
|
// VPERMQ imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xfd)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x36)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x36)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x36)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPERMQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x36)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2B performs "Full Permute of Bytes From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2B
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMT2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMT2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMT2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPERMT2B zmm, zmm, zmm{k}{z} [AVX512VBMI]
|
|
// * VPERMT2B m512, zmm, zmm{k}{z} [AVX512VBMI]
|
|
//
|
|
func (self *Program) VPERMT2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2B", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2B xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2B m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2B ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2B m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMT2B zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2B m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2D performs "Full Permute of Doublewords From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2D
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2D zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMT2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2D", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2D m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMT2D zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2D m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2D xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2D m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMT2D ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2D")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2PD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMT2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2PD", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2PD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMT2PD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2PD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2PD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2PD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMT2PD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2PS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2PS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMT2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2PS", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2PS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMT2PS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2PS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2PS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2PS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMT2PS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2Q performs "Full Permute of Quadwords From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2Q
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2Q zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPERMT2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMT2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2Q", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2Q m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMT2Q zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2Q m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2Q xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2Q m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x7e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPERMT2Q ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMT2W performs "Full Permute of Words From Two Tables Overwriting a Table".
|
|
//
|
|
// Mnemonic : VPERMT2W
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMT2W zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMT2W m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMT2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMT2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMT2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMT2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMT2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMT2W", 3, Operands { v0, v1, v2 })
|
|
// VPERMT2W zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2W m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMT2W xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2W m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMT2W ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x7d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMT2W m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x7d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMT2W")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPERMW performs "Permute Word Integers".
|
|
//
|
|
// Mnemonic : VPERMW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPERMW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPERMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPERMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPERMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPERMW", 3, Operands { v0, v1, v2 })
|
|
// VPERMW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPERMW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPERMW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x8d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPERMW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x8d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPERMW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXPANDD performs "Load Sparse Packed Doubleword Integer Values from Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPEXPANDD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPEXPANDD zmm, zmm{k}{z} [AVX512F]
|
|
// * VPEXPANDD m512, zmm{k}{z} [AVX512F]
|
|
// * VPEXPANDD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDD m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDD m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPEXPANDD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPEXPANDD", 2, Operands { v0, v1 })
|
|
// VPEXPANDD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDD m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPEXPANDD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDD m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPEXPANDD m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXPANDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXPANDQ performs "Load Sparse Packed Quadword Integer Values from Dense Memory/Register".
|
|
//
|
|
// Mnemonic : VPEXPANDQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPEXPANDQ zmm, zmm{k}{z} [AVX512F]
|
|
// * VPEXPANDQ m512, zmm{k}{z} [AVX512F]
|
|
// * VPEXPANDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDQ m128, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPEXPANDQ m256, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPEXPANDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPEXPANDQ", 2, Operands { v0, v1 })
|
|
// VPEXPANDQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDQ m512, zmm{k}{z}
|
|
if isM512(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPEXPANDQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDQ ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x89)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPEXPANDQ m128, xmm{k}{z}
|
|
if isM128(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPEXPANDQ m256, ymm{k}{z}
|
|
if isM256(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x89)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXPANDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXTRB performs "Extract Byte".
|
|
//
|
|
// Mnemonic : VPEXTRB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPEXTRB imm8, xmm, r32 [AVX]
|
|
// * VPEXTRB imm8, xmm, m8 [AVX]
|
|
// * VPEXTRB imm8, xmm, r32 [AVX512BW]
|
|
// * VPEXTRB imm8, xmm, m8 [AVX512BW]
|
|
//
|
|
func (self *Program) VPEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPEXTRB", 3, Operands { v0, v1, v2 })
|
|
// VPEXTRB imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRB imm8, xmm, m8
|
|
if isImm8(v0) && isXMM(v1) && isM8(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRB imm8, xmm, r32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRB imm8, xmm, m8
|
|
if isImm8(v0) && isEVEXXMM(v1) && isM8(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXTRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXTRD performs "Extract Doubleword".
|
|
//
|
|
// Mnemonic : VPEXTRD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPEXTRD imm8, xmm, r32 [AVX]
|
|
// * VPEXTRD imm8, xmm, m32 [AVX]
|
|
// * VPEXTRD imm8, xmm, r32 [AVX512DQ]
|
|
// * VPEXTRD imm8, xmm, m32 [AVX512DQ]
|
|
//
|
|
func (self *Program) VPEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPEXTRD", 3, Operands { v0, v1, v2 })
|
|
// VPEXTRD imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRD imm8, xmm, m32
|
|
if isImm8(v0) && isXMM(v1) && isM32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRD imm8, xmm, r32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRD imm8, xmm, m32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXTRD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXTRQ performs "Extract Quadword".
|
|
//
|
|
// Mnemonic : VPEXTRQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPEXTRQ imm8, xmm, r64 [AVX]
|
|
// * VPEXTRQ imm8, xmm, m64 [AVX]
|
|
// * VPEXTRQ imm8, xmm, r64 [AVX512DQ]
|
|
// * VPEXTRQ imm8, xmm, m64 [AVX512DQ]
|
|
//
|
|
func (self *Program) VPEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPEXTRQ", 3, Operands { v0, v1, v2 })
|
|
// VPEXTRQ imm8, xmm, r64
|
|
if isImm8(v0) && isXMM(v1) && isReg64(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0xf9)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRQ imm8, xmm, m64
|
|
if isImm8(v0) && isXMM(v1) && isM64(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRQ imm8, xmm, r64
|
|
if isImm8(v0) && isEVEXXMM(v1) && isReg64(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x08)
|
|
m.emit(0x16)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRQ imm8, xmm, m64
|
|
if isImm8(v0) && isEVEXXMM(v1) && isM64(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
|
|
m.emit(0x16)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXTRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPEXTRW performs "Extract Word".
|
|
//
|
|
// Mnemonic : VPEXTRW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPEXTRW imm8, xmm, r32 [AVX]
|
|
// * VPEXTRW imm8, xmm, m16 [AVX]
|
|
// * VPEXTRW imm8, xmm, r32 [AVX512BW]
|
|
// * VPEXTRW imm8, xmm, m16 [AVX512BW]
|
|
//
|
|
func (self *Program) VPEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPEXTRW", 3, Operands { v0, v1, v2 })
|
|
// VPEXTRW imm8, xmm, r32
|
|
if isImm8(v0) && isXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[1], 0)
|
|
m.emit(0xc5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRW imm8, xmm, m16
|
|
if isImm8(v0) && isXMM(v1) && isM16(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRW imm8, xmm, r32
|
|
if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0xc5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPEXTRW imm8, xmm, m16
|
|
if isImm8(v0) && isEVEXXMM(v1) && isM16(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[1]), addr(v[2]), 2)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPEXTRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPGATHERDD performs "Gather Packed Doubleword Values Using Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VPGATHERDD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPGATHERDD xmm, vm32x, xmm [AVX2]
|
|
// * VPGATHERDD ymm, vm32y, ymm [AVX2]
|
|
// * VPGATHERDD vm32z, zmm{k} [AVX512F]
|
|
// * VPGATHERDD vm32x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VPGATHERDD vm32y, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPGATHERDD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VPGATHERDD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VPGATHERDD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VPGATHERDD takes 2 or 3 operands")
|
|
}
|
|
// VPGATHERDD xmm, vm32x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERDD ymm, vm32y, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERDD vm32z, zmm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPGATHERDD vm32x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPGATHERDD vm32y, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPGATHERDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPGATHERDQ performs "Gather Packed Quadword Values Using Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VPGATHERDQ
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPGATHERDQ xmm, vm32x, xmm [AVX2]
|
|
// * VPGATHERDQ ymm, vm32x, ymm [AVX2]
|
|
// * VPGATHERDQ vm32y, zmm{k} [AVX512F]
|
|
// * VPGATHERDQ vm32x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VPGATHERDQ vm32x, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPGATHERDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VPGATHERDQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VPGATHERDQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VPGATHERDQ takes 2 or 3 operands")
|
|
}
|
|
// VPGATHERDQ xmm, vm32x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERDQ ymm, vm32x, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERDQ vm32y, zmm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPGATHERDQ vm32x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPGATHERDQ vm32x, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPGATHERDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPGATHERQD performs "Gather Packed Doubleword Values Using Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VPGATHERQD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPGATHERQD xmm, vm64x, xmm [AVX2]
|
|
// * VPGATHERQD xmm, vm64y, xmm [AVX2]
|
|
// * VPGATHERQD vm64z, ymm{k} [AVX512F]
|
|
// * VPGATHERQD vm64x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VPGATHERQD vm64y, xmm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPGATHERQD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VPGATHERQD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VPGATHERQD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VPGATHERQD takes 2 or 3 operands")
|
|
}
|
|
// VPGATHERQD xmm, vm64x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERQD xmm, vm64y, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERQD vm64z, ymm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPGATHERQD vm64x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPGATHERQD vm64y, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPGATHERQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPGATHERQQ performs "Gather Packed Quadword Values Using Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VPGATHERQQ
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPGATHERQQ xmm, vm64x, xmm [AVX2]
|
|
// * VPGATHERQQ ymm, vm64y, ymm [AVX2]
|
|
// * VPGATHERQQ vm64z, zmm{k} [AVX512F]
|
|
// * VPGATHERQQ vm64x, xmm{k} [AVX512F,AVX512VL]
|
|
// * VPGATHERQQ vm64y, ymm{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPGATHERQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VPGATHERQQ", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VPGATHERQQ", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VPGATHERQQ takes 2 or 3 operands")
|
|
}
|
|
// VPGATHERQQ xmm, vm64x, xmm
|
|
if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERQQ ymm, vm64y, ymm
|
|
if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
// VPGATHERQQ vm64z, zmm{k}
|
|
if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPGATHERQQ vm64x, xmm{k}
|
|
if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPGATHERQQ vm64y, ymm{k}
|
|
if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPGATHERQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDBD performs "Packed Horizontal Add Signed Byte to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPHADDBD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDBD xmm, xmm [XOP]
|
|
// * VPHADDBD m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDBD", 2, Operands { v0, v1 })
|
|
// VPHADDBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDBD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDBQ performs "Packed Horizontal Add Signed Byte to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPHADDBQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDBQ xmm, xmm [XOP]
|
|
// * VPHADDBQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDBQ", 2, Operands { v0, v1 })
|
|
// VPHADDBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDBQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xc3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDBW performs "Packed Horizontal Add Signed Byte to Signed Word".
|
|
//
|
|
// Mnemonic : VPHADDBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDBW xmm, xmm [XOP]
|
|
// * VPHADDBW m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDBW", 2, Operands { v0, v1 })
|
|
// VPHADDBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xc1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDD performs "Packed Horizontal Add Doubleword Integer".
|
|
//
|
|
// Mnemonic : VPHADDD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHADDD xmm, xmm, xmm [AVX]
|
|
// * VPHADDD m128, xmm, xmm [AVX]
|
|
// * VPHADDD ymm, ymm, ymm [AVX2]
|
|
// * VPHADDD m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDD", 3, Operands { v0, v1, v2 })
|
|
// VPHADDD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHADDD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x02)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x02)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDDQ performs "Packed Horizontal Add Signed Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPHADDDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDDQ xmm, xmm [XOP]
|
|
// * VPHADDDQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDDQ", 2, Operands { v0, v1 })
|
|
// VPHADDDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xcb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPHADDSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHADDSW xmm, xmm, xmm [AVX]
|
|
// * VPHADDSW m128, xmm, xmm [AVX]
|
|
// * VPHADDSW ymm, ymm, ymm [AVX2]
|
|
// * VPHADDSW m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDSW", 3, Operands { v0, v1, v2 })
|
|
// VPHADDSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHADDSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x03)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x03)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUBD performs "Packed Horizontal Add Unsigned Byte to Doubleword".
|
|
//
|
|
// Mnemonic : VPHADDUBD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUBD xmm, xmm [XOP]
|
|
// * VPHADDUBD m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUBD", 2, Operands { v0, v1 })
|
|
// VPHADDUBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUBD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUBQ performs "Packed Horizontal Add Unsigned Byte to Quadword".
|
|
//
|
|
// Mnemonic : VPHADDUBQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUBQ xmm, xmm [XOP]
|
|
// * VPHADDUBQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUBQ", 2, Operands { v0, v1 })
|
|
// VPHADDUBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUBQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUBW performs "Packed Horizontal Add Unsigned Byte to Word".
|
|
//
|
|
// Mnemonic : VPHADDUBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUBW xmm, xmm [XOP]
|
|
// * VPHADDUBW m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUBW", 2, Operands { v0, v1 })
|
|
// VPHADDUBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUDQ performs "Packed Horizontal Add Unsigned Doubleword to Quadword".
|
|
//
|
|
// Mnemonic : VPHADDUDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUDQ xmm, xmm [XOP]
|
|
// * VPHADDUDQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUDQ", 2, Operands { v0, v1 })
|
|
// VPHADDUDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xdb)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xdb)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUWD performs "Packed Horizontal Add Unsigned Word to Doubleword".
|
|
//
|
|
// Mnemonic : VPHADDUWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUWD xmm, xmm [XOP]
|
|
// * VPHADDUWD m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUWD", 2, Operands { v0, v1 })
|
|
// VPHADDUWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xd6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xd6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDUWQ performs "Packed Horizontal Add Unsigned Word to Quadword".
|
|
//
|
|
// Mnemonic : VPHADDUWQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDUWQ xmm, xmm [XOP]
|
|
// * VPHADDUWQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDUWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDUWQ", 2, Operands { v0, v1 })
|
|
// VPHADDUWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xd7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDUWQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xd7)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDUWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDW performs "Packed Horizontal Add Word Integers".
|
|
//
|
|
// Mnemonic : VPHADDW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHADDW xmm, xmm, xmm [AVX]
|
|
// * VPHADDW m128, xmm, xmm [AVX]
|
|
// * VPHADDW ymm, ymm, ymm [AVX2]
|
|
// * VPHADDW m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDW", 3, Operands { v0, v1, v2 })
|
|
// VPHADDW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHADDW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x01)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x01)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDWD performs "Packed Horizontal Add Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPHADDWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDWD xmm, xmm [XOP]
|
|
// * VPHADDWD m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDWD", 2, Operands { v0, v1 })
|
|
// VPHADDWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHADDWQ performs "Packed Horizontal Add Signed Word to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPHADDWQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHADDWQ xmm, xmm [XOP]
|
|
// * VPHADDWQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHADDWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHADDWQ", 2, Operands { v0, v1 })
|
|
// VPHADDWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHADDWQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHADDWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : VPHMINPOSUW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHMINPOSUW xmm, xmm [AVX]
|
|
// * VPHMINPOSUW m128, xmm [AVX]
|
|
//
|
|
func (self *Program) VPHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHMINPOSUW", 2, Operands { v0, v1 })
|
|
// VPHMINPOSUW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x41)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHMINPOSUW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x41)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHMINPOSUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBBW performs "Packed Horizontal Subtract Signed Byte to Signed Word".
|
|
//
|
|
// Mnemonic : VPHSUBBW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHSUBBW xmm, xmm [XOP]
|
|
// * VPHSUBBW m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHSUBBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBBW", 2, Operands { v0, v1 })
|
|
// VPHSUBBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBBW m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBD performs "Packed Horizontal Subtract Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPHSUBD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHSUBD xmm, xmm, xmm [AVX]
|
|
// * VPHSUBD m128, xmm, xmm [AVX]
|
|
// * VPHSUBD ymm, ymm, ymm [AVX2]
|
|
// * VPHSUBD m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBD", 3, Operands { v0, v1, v2 })
|
|
// VPHSUBD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x06)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x06)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHSUBD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x06)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x06)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBDQ performs "Packed Horizontal Subtract Signed Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPHSUBDQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHSUBDQ xmm, xmm [XOP]
|
|
// * VPHSUBDQ m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHSUBDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBDQ", 2, Operands { v0, v1 })
|
|
// VPHSUBDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xe3)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBDQ m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe3)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPHSUBSW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHSUBSW xmm, xmm, xmm [AVX]
|
|
// * VPHSUBSW m128, xmm, xmm [AVX]
|
|
// * VPHSUBSW ymm, ymm, ymm [AVX2]
|
|
// * VPHSUBSW m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBSW", 3, Operands { v0, v1, v2 })
|
|
// VPHSUBSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x07)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x07)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHSUBSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x07)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x07)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBW performs "Packed Horizontal Subtract Word Integers".
|
|
//
|
|
// Mnemonic : VPHSUBW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPHSUBW xmm, xmm, xmm [AVX]
|
|
// * VPHSUBW m128, xmm, xmm [AVX]
|
|
// * VPHSUBW ymm, ymm, ymm [AVX2]
|
|
// * VPHSUBW m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPHSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBW", 3, Operands { v0, v1, v2 })
|
|
// VPHSUBW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPHSUBW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x05)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x05)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPHSUBWD performs "Packed Horizontal Subtract Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPHSUBWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPHSUBWD xmm, xmm [XOP]
|
|
// * VPHSUBWD m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPHSUBWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPHSUBWD", 2, Operands { v0, v1 })
|
|
// VPHSUBWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPHSUBWD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPHSUBWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPINSRB performs "Insert Byte".
|
|
//
|
|
// Mnemonic : VPINSRB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPINSRB imm8, r32, xmm, xmm [AVX]
|
|
// * VPINSRB imm8, m8, xmm, xmm [AVX]
|
|
// * VPINSRB imm8, r32, xmm, xmm [AVX512BW]
|
|
// * VPINSRB imm8, m8, xmm, xmm [AVX512BW]
|
|
//
|
|
func (self *Program) VPINSRB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPINSRB", 4, Operands { v0, v1, v2, v3 })
|
|
// VPINSRB imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRB imm8, m8, xmm, xmm
|
|
if isImm8(v0) && isM8(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRB imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRB imm8, m8, xmm, xmm
|
|
if isImm8(v0) && isM8(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPINSRB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPINSRD performs "Insert Doubleword".
|
|
//
|
|
// Mnemonic : VPINSRD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPINSRD imm8, r32, xmm, xmm [AVX]
|
|
// * VPINSRD imm8, m32, xmm, xmm [AVX]
|
|
// * VPINSRD imm8, r32, xmm, xmm [AVX512DQ]
|
|
// * VPINSRD imm8, m32, xmm, xmm [AVX512DQ]
|
|
//
|
|
func (self *Program) VPINSRD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPINSRD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPINSRD imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRD imm8, m32, xmm, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRD imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRD imm8, m32, xmm, xmm
|
|
if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPINSRD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPINSRQ performs "Insert Quadword".
|
|
//
|
|
// Mnemonic : VPINSRQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPINSRQ imm8, r64, xmm, xmm [AVX]
|
|
// * VPINSRQ imm8, m64, xmm, xmm [AVX]
|
|
// * VPINSRQ imm8, r64, xmm, xmm [AVX512DQ]
|
|
// * VPINSRQ imm8, m64, xmm, xmm [AVX512DQ]
|
|
//
|
|
func (self *Program) VPINSRQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPINSRQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPINSRQ imm8, r64, xmm, xmm
|
|
if isImm8(v0) && isReg64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRQ imm8, m64, xmm, xmm
|
|
if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRQ imm8, r64, xmm, xmm
|
|
if isImm8(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRQ imm8, m64, xmm, xmm
|
|
if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPINSRQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPINSRW performs "Insert Word".
|
|
//
|
|
// Mnemonic : VPINSRW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPINSRW imm8, r32, xmm, xmm [AVX]
|
|
// * VPINSRW imm8, m16, xmm, xmm [AVX]
|
|
// * VPINSRW imm8, r32, xmm, xmm [AVX512BW]
|
|
// * VPINSRW imm8, m16, xmm, xmm [AVX512BW]
|
|
//
|
|
func (self *Program) VPINSRW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPINSRW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPINSRW imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRW imm8, m16, xmm, xmm
|
|
if isImm8(v0) && isM16(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRW imm8, r32, xmm, xmm
|
|
if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0xc4)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPINSRW imm8, m16, xmm, xmm
|
|
if isImm8(v0) && isM16(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0xc4)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 2)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPINSRW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPLZCNTD performs "Count the Number of Leading Zero Bits for Packed Doubleword Values".
|
|
//
|
|
// Mnemonic : VPLZCNTD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPLZCNTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTD m512/m32bcst, zmm{k}{z} [AVX512CD]
|
|
// * VPLZCNTD xmm, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTD ymm, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTD zmm, zmm{k}{z} [AVX512CD]
|
|
//
|
|
func (self *Program) VPLZCNTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPLZCNTD", 2, Operands { v0, v1 })
|
|
// VPLZCNTD m128/m32bcst, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPLZCNTD m256/m32bcst, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPLZCNTD m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPLZCNTD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPLZCNTD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPLZCNTD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPLZCNTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPLZCNTQ performs "Count the Number of Leading Zero Bits for Packed Quadword Values".
|
|
//
|
|
// Mnemonic : VPLZCNTQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPLZCNTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTQ m512/m64bcst, zmm{k}{z} [AVX512CD]
|
|
// * VPLZCNTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL]
|
|
// * VPLZCNTQ zmm, zmm{k}{z} [AVX512CD]
|
|
//
|
|
func (self *Program) VPLZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPLZCNTQ", 2, Operands { v0, v1 })
|
|
// VPLZCNTQ m128/m64bcst, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPLZCNTQ m256/m64bcst, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPLZCNTQ m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x44)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPLZCNTQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPLZCNTQ ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPLZCNTQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512CD)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x44)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPLZCNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSDD performs "Packed Multiply Accumulate Signed Doubleword to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMACSDD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSDD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSDD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSDD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSDD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x9e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSDD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x9e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSDQH performs "Packed Multiply Accumulate Signed High Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPMACSDQH
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSDQH xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSDQH xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSDQH", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSDQH xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x9f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSDQH xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x9f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSDQH")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSDQL performs "Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPMACSDQL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSDQL xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSDQL xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSDQL", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSDQL xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSDQL xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSDQL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSSDD performs "Packed Multiply Accumulate with Saturation Signed Doubleword to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMACSSDD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSSDD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSSDD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSSDD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSSDD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x8e)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSSDD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x8e)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSSDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSSDQH performs "Packed Multiply Accumulate with Saturation Signed High Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPMACSSDQH
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSSDQH xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSSDQH xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSSDQH", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSSDQH xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x8f)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSSDQH xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x8f)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSSDQH")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSSDQL performs "Packed Multiply Accumulate with Saturation Signed Low Doubleword to Signed Quadword".
|
|
//
|
|
// Mnemonic : VPMACSSDQL
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSSDQL xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSSDQL xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSSDQL", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSSDQL xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSSDQL xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSSDQL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSSWD performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMACSSWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSSWD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSSWD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSSWD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSSWD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x86)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSSWD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x86)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSSWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSSWW performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Word".
|
|
//
|
|
// Mnemonic : VPMACSSWW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSSWW xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSSWW xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSSWW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSSWW xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x85)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSSWW xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x85)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSSWW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSWD performs "Packed Multiply Accumulate Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMACSWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSWD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSWD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSWD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSWD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSWD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMACSWW performs "Packed Multiply Accumulate Signed Word to Signed Word".
|
|
//
|
|
// Mnemonic : VPMACSWW
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMACSWW xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMACSWW xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMACSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMACSWW", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMACSWW xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x95)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMACSWW xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x95)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMACSWW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADCSSWD performs "Packed Multiply Add Accumulate with Saturation Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMADCSSWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMADCSSWD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMADCSSWD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMADCSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMADCSSWD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMADCSSWD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMADCSSWD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xa6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADCSSWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADCSWD performs "Packed Multiply Add Accumulate Signed Word to Signed Doubleword".
|
|
//
|
|
// Mnemonic : VPMADCSWD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMADCSWD xmm, xmm, xmm, xmm [XOP]
|
|
// * VPMADCSWD xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPMADCSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPMADCSWD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPMADCSWD xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xb6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
// VPMADCSWD xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xb6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADCSWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADD52HUQ performs "Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to Quadword Accumulators".
|
|
//
|
|
// Mnemonic : VPMADD52HUQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52HUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52HUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA]
|
|
// * VPMADD52HUQ zmm, zmm, zmm{k}{z} [AVX512IFMA]
|
|
//
|
|
func (self *Program) VPMADD52HUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMADD52HUQ", 3, Operands { v0, v1, v2 })
|
|
// VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMADD52HUQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMADD52HUQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMADD52HUQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADD52HUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADD52LUQ performs "Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Quadword Accumulators".
|
|
//
|
|
// Mnemonic : VPMADD52LUQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52LUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52LUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
|
|
// * VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA]
|
|
// * VPMADD52LUQ zmm, zmm, zmm{k}{z} [AVX512IFMA]
|
|
//
|
|
func (self *Program) VPMADD52LUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMADD52LUQ", 3, Operands { v0, v1, v2 })
|
|
// VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMADD52LUQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xb4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMADD52LUQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xb4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xb4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMADD52LUQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512IFMA)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xb4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADD52LUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : VPMADDUBSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMADDUBSW xmm, xmm, xmm [AVX]
|
|
// * VPMADDUBSW m128, xmm, xmm [AVX]
|
|
// * VPMADDUBSW ymm, ymm, ymm [AVX2]
|
|
// * VPMADDUBSW m256, ymm, ymm [AVX2]
|
|
// * VPMADDUBSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMADDUBSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMADDUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMADDUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMADDUBSW", 3, Operands { v0, v1, v2 })
|
|
// VPMADDUBSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDUBSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMADDUBSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDUBSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMADDUBSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDUBSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMADDUBSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDUBSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMADDUBSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x04)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDUBSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x04)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADDUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMADDWD performs "Multiply and Add Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : VPMADDWD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMADDWD xmm, xmm, xmm [AVX]
|
|
// * VPMADDWD m128, xmm, xmm [AVX]
|
|
// * VPMADDWD ymm, ymm, ymm [AVX2]
|
|
// * VPMADDWD m256, ymm, ymm [AVX2]
|
|
// * VPMADDWD zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMADDWD m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMADDWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMADDWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMADDWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMADDWD", 3, Operands { v0, v1, v2 })
|
|
// VPMADDWD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDWD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMADDWD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDWD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMADDWD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDWD m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMADDWD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDWD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMADDWD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMADDWD m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMADDWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMASKMOVD performs "Conditional Move Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMASKMOVD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPMASKMOVD m128, xmm, xmm [AVX2]
|
|
// * VPMASKMOVD m256, ymm, ymm [AVX2]
|
|
// * VPMASKMOVD xmm, xmm, m128 [AVX2]
|
|
// * VPMASKMOVD ymm, ymm, m256 [AVX2]
|
|
//
|
|
func (self *Program) VPMASKMOVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMASKMOVD", 3, Operands { v0, v1, v2 })
|
|
// VPMASKMOVD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x8c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x8c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVD xmm, xmm, m128
|
|
if isXMM(v0) && isXMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x8e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVD ymm, ymm, m256
|
|
if isYMM(v0) && isYMM(v1) && isM256(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x8e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMASKMOVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMASKMOVQ performs "Conditional Move Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMASKMOVQ
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPMASKMOVQ m128, xmm, xmm [AVX2]
|
|
// * VPMASKMOVQ m256, ymm, ymm [AVX2]
|
|
// * VPMASKMOVQ xmm, xmm, m128 [AVX2]
|
|
// * VPMASKMOVQ ymm, ymm, m256 [AVX2]
|
|
//
|
|
func (self *Program) VPMASKMOVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMASKMOVQ", 3, Operands { v0, v1, v2 })
|
|
// VPMASKMOVQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x8c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x8c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVQ xmm, xmm, m128
|
|
if isXMM(v0) && isXMM(v1) && isM128(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x8e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
// VPMASKMOVQ ymm, ymm, m256
|
|
if isYMM(v0) && isYMM(v1) && isM256(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[0]), addr(v[2]), hlcode(v[1]))
|
|
m.emit(0x8e)
|
|
m.mrsd(lcode(v[0]), addr(v[2]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMASKMOVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXSB performs "Maximum of Packed Signed Byte Integers".
|
|
//
|
|
// Mnemonic : VPMAXSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXSB xmm, xmm, xmm [AVX]
|
|
// * VPMAXSB m128, xmm, xmm [AVX]
|
|
// * VPMAXSB ymm, ymm, ymm [AVX2]
|
|
// * VPMAXSB m256, ymm, ymm [AVX2]
|
|
// * VPMAXSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXSB", 3, Operands { v0, v1, v2 })
|
|
// VPMAXSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXSD performs "Maximum of Packed Signed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMAXSD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXSD xmm, xmm, xmm [AVX]
|
|
// * VPMAXSD m128, xmm, xmm [AVX]
|
|
// * VPMAXSD ymm, ymm, ymm [AVX2]
|
|
// * VPMAXSD m256, ymm, ymm [AVX2]
|
|
// * VPMAXSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXSD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXSD", 3, Operands { v0, v1, v2 })
|
|
// VPMAXSD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXSD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXSD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMAXSD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXSQ performs "Maximum of Packed Signed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMAXSQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXSQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXSQ", 3, Operands { v0, v1, v2 })
|
|
// VPMAXSQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXSQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXSQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMAXSQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXSQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXSW performs "Maximum of Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : VPMAXSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXSW xmm, xmm, xmm [AVX]
|
|
// * VPMAXSW m128, xmm, xmm [AVX]
|
|
// * VPMAXSW ymm, ymm, ymm [AVX2]
|
|
// * VPMAXSW m256, ymm, ymm [AVX2]
|
|
// * VPMAXSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXSW", 3, Operands { v0, v1, v2 })
|
|
// VPMAXSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xee)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xee)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXUB performs "Maximum of Packed Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : VPMAXUB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXUB xmm, xmm, xmm [AVX]
|
|
// * VPMAXUB m128, xmm, xmm [AVX]
|
|
// * VPMAXUB ymm, ymm, ymm [AVX2]
|
|
// * VPMAXUB m256, ymm, ymm [AVX2]
|
|
// * VPMAXUB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXUB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXUB", 3, Operands { v0, v1, v2 })
|
|
// VPMAXUB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXUB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXUB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xde)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xde)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXUD performs "Maximum of Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMAXUD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXUD xmm, xmm, xmm [AVX]
|
|
// * VPMAXUD m128, xmm, xmm [AVX]
|
|
// * VPMAXUD ymm, ymm, ymm [AVX2]
|
|
// * VPMAXUD m256, ymm, ymm [AVX2]
|
|
// * VPMAXUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXUD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXUD", 3, Operands { v0, v1, v2 })
|
|
// VPMAXUD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXUD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXUD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMAXUD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXUQ performs "Maximum of Packed Unsigned Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMAXUQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXUQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMAXUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXUQ", 3, Operands { v0, v1, v2 })
|
|
// VPMAXUQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXUQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXUQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMAXUQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMAXUW performs "Maximum of Packed Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : VPMAXUW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMAXUW xmm, xmm, xmm [AVX]
|
|
// * VPMAXUW m128, xmm, xmm [AVX]
|
|
// * VPMAXUW ymm, ymm, ymm [AVX2]
|
|
// * VPMAXUW m256, ymm, ymm [AVX2]
|
|
// * VPMAXUW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXUW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMAXUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMAXUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMAXUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMAXUW", 3, Operands { v0, v1, v2 })
|
|
// VPMAXUW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMAXUW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMAXUW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMAXUW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMAXUW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3e)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMAXUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINSB performs "Minimum of Packed Signed Byte Integers".
|
|
//
|
|
// Mnemonic : VPMINSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINSB xmm, xmm, xmm [AVX]
|
|
// * VPMINSB m128, xmm, xmm [AVX]
|
|
// * VPMINSB ymm, ymm, ymm [AVX2]
|
|
// * VPMINSB m256, ymm, ymm [AVX2]
|
|
// * VPMINSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINSB", 3, Operands { v0, v1, v2 })
|
|
// VPMINSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x38)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINSD performs "Minimum of Packed Signed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMINSD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINSD xmm, xmm, xmm [AVX]
|
|
// * VPMINSD m128, xmm, xmm [AVX]
|
|
// * VPMINSD ymm, ymm, ymm [AVX2]
|
|
// * VPMINSD m256, ymm, ymm [AVX2]
|
|
// * VPMINSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINSD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINSD", 3, Operands { v0, v1, v2 })
|
|
// VPMINSD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINSD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINSD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMINSD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINSQ performs "Minimum of Packed Signed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMINSQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMINSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINSQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINSQ", 3, Operands { v0, v1, v2 })
|
|
// VPMINSQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINSQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINSQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x39)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMINSQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINSQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINSW performs "Minimum of Packed Signed Word Integers".
|
|
//
|
|
// Mnemonic : VPMINSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINSW xmm, xmm, xmm [AVX]
|
|
// * VPMINSW m128, xmm, xmm [AVX]
|
|
// * VPMINSW ymm, ymm, ymm [AVX2]
|
|
// * VPMINSW m256, ymm, ymm [AVX2]
|
|
// * VPMINSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINSW", 3, Operands { v0, v1, v2 })
|
|
// VPMINSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xea)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xea)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINUB performs "Minimum of Packed Unsigned Byte Integers".
|
|
//
|
|
// Mnemonic : VPMINUB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINUB xmm, xmm, xmm [AVX]
|
|
// * VPMINUB m128, xmm, xmm [AVX]
|
|
// * VPMINUB ymm, ymm, ymm [AVX2]
|
|
// * VPMINUB m256, ymm, ymm [AVX2]
|
|
// * VPMINUB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINUB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINUB", 3, Operands { v0, v1, v2 })
|
|
// VPMINUB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINUB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINUB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xda)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xda)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINUB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINUD performs "Minimum of Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMINUD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINUD xmm, xmm, xmm [AVX]
|
|
// * VPMINUD m128, xmm, xmm [AVX]
|
|
// * VPMINUD ymm, ymm, ymm [AVX2]
|
|
// * VPMINUD m256, ymm, ymm [AVX2]
|
|
// * VPMINUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINUD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINUD", 3, Operands { v0, v1, v2 })
|
|
// VPMINUD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINUD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINUD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMINUD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINUD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINUQ performs "Minimum of Packed Unsigned Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMINUQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMINUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINUQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMINUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMINUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINUQ", 3, Operands { v0, v1, v2 })
|
|
// VPMINUQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINUQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINUQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x3b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMINUQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINUQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMINUW performs "Minimum of Packed Unsigned Word Integers".
|
|
//
|
|
// Mnemonic : VPMINUW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMINUW xmm, xmm, xmm [AVX]
|
|
// * VPMINUW m128, xmm, xmm [AVX]
|
|
// * VPMINUW ymm, ymm, ymm [AVX2]
|
|
// * VPMINUW m256, ymm, ymm [AVX2]
|
|
// * VPMINUW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINUW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMINUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMINUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMINUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMINUW", 3, Operands { v0, v1, v2 })
|
|
// VPMINUW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMINUW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMINUW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMINUW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x3a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMINUW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x3a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMINUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVB2M performs "Move Signs of Packed Byte Integers to Mask Register".
|
|
//
|
|
// Mnemonic : VPMOVB2M
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVB2M zmm, k [AVX512BW]
|
|
// * VPMOVB2M xmm, k [AVX512BW,AVX512VL]
|
|
// * VPMOVB2M ymm, k [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVB2M(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVB2M", 2, Operands { v0, v1 })
|
|
// VPMOVB2M zmm, k
|
|
if isZMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVB2M xmm, k
|
|
if isEVEXXMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x08)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVB2M ymm, k
|
|
if isEVEXYMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x28)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVB2M")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVD2M performs "Move Signs of Packed Doubleword Integers to Mask Register".
|
|
//
|
|
// Mnemonic : VPMOVD2M
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVD2M zmm, k [AVX512DQ]
|
|
// * VPMOVD2M xmm, k [AVX512DQ,AVX512VL]
|
|
// * VPMOVD2M ymm, k [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVD2M(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVD2M", 2, Operands { v0, v1 })
|
|
// VPMOVD2M zmm, k
|
|
if isZMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVD2M xmm, k
|
|
if isEVEXXMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x08)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVD2M ymm, k
|
|
if isEVEXYMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x28)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVD2M")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVDB performs "Down Convert Packed Doubleword Values to Byte Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVDB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVDB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVDB zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDB xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDB ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVDB", 2, Operands { v0, v1 })
|
|
// VPMOVDB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDB zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVDB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDB xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVDB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDB ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVDW performs "Down Convert Packed Doubleword Values to Word Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVDW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVDW zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVDW zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDW xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVDW ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVDW", 2, Operands { v0, v1 })
|
|
// VPMOVDW zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDW zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVDW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDW xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVDW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVDW ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVM2B performs "Expand Bits of Mask Register to Packed Byte Integers".
|
|
//
|
|
// Mnemonic : VPMOVM2B
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVM2B k, zmm [AVX512BW]
|
|
// * VPMOVM2B k, xmm [AVX512BW,AVX512VL]
|
|
// * VPMOVM2B k, ymm [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVM2B(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVM2B", 2, Operands { v0, v1 })
|
|
// VPMOVM2B k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2B k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x08)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2B k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x28)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVM2B")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVM2D performs "Expand Bits of Mask Register to Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMOVM2D
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVM2D k, zmm [AVX512DQ]
|
|
// * VPMOVM2D k, xmm [AVX512DQ,AVX512VL]
|
|
// * VPMOVM2D k, ymm [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVM2D(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVM2D", 2, Operands { v0, v1 })
|
|
// VPMOVM2D k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x48)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2D k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x08)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2D k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit(0x28)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVM2D")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVM2Q performs "Expand Bits of Mask Register to Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPMOVM2Q
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVM2Q k, zmm [AVX512DQ]
|
|
// * VPMOVM2Q k, xmm [AVX512DQ,AVX512VL]
|
|
// * VPMOVM2Q k, ymm [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVM2Q(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVM2Q", 2, Operands { v0, v1 })
|
|
// VPMOVM2Q k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2Q k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2Q k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x28)
|
|
m.emit(0x38)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVM2Q")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVM2W performs "Expand Bits of Mask Register to Packed Word Integers".
|
|
//
|
|
// Mnemonic : VPMOVM2W
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVM2W k, zmm [AVX512BW]
|
|
// * VPMOVM2W k, xmm [AVX512BW,AVX512VL]
|
|
// * VPMOVM2W k, ymm [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVM2W(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVM2W", 2, Operands { v0, v1 })
|
|
// VPMOVM2W k, zmm
|
|
if isK(v0) && isZMM(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2W k, xmm
|
|
if isK(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVM2W k, ymm
|
|
if isK(v0) && isEVEXYMM(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x28)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVM2W")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVMSKB performs "Move Byte Mask".
|
|
//
|
|
// Mnemonic : VPMOVMSKB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPMOVMSKB xmm, r32 [AVX]
|
|
// * VPMOVMSKB ymm, r32 [AVX2]
|
|
//
|
|
func (self *Program) VPMOVMSKB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVMSKB", 2, Operands { v0, v1 })
|
|
// VPMOVMSKB xmm, r32
|
|
if isXMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0xd7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVMSKB ymm, r32
|
|
if isYMM(v0) && isReg32(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0xd7)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVMSKB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVQ2M performs "Move Signs of Packed Quadword Integers to Mask Register".
|
|
//
|
|
// Mnemonic : VPMOVQ2M
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVQ2M zmm, k [AVX512DQ]
|
|
// * VPMOVQ2M xmm, k [AVX512DQ,AVX512VL]
|
|
// * VPMOVQ2M ymm, k [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVQ2M(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVQ2M", 2, Operands { v0, v1 })
|
|
// VPMOVQ2M zmm, k
|
|
if isZMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVQ2M xmm, k
|
|
if isEVEXXMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVQ2M ymm, k
|
|
if isEVEXYMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x28)
|
|
m.emit(0x39)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVQ2M")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVQB performs "Down Convert Packed Quadword Values to Byte Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVQB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVQB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVQB zmm, m64{k}{z} [AVX512F]
|
|
// * VPMOVQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQB xmm, m16{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQB ymm, m32{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVQB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVQB", 2, Operands { v0, v1 })
|
|
// VPMOVQB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQB zmm, m64{k}{z}
|
|
if isZMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVQB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQB xmm, m16{k}{z}
|
|
if isEVEXXMM(v0) && isM16kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 2)
|
|
})
|
|
}
|
|
// VPMOVQB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQB ymm, m32{k}{z}
|
|
if isEVEXYMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVQD performs "Down Convert Packed Quadword Values to Doubleword Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVQD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVQD zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVQD zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQD xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQD ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVQD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVQD", 2, Operands { v0, v1 })
|
|
// VPMOVQD zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQD zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVQD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQD xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVQD ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQD ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVQW performs "Down Convert Packed Quadword Values to Word Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVQW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVQW zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVQW zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQW xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVQW ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVQW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVQW", 2, Operands { v0, v1 })
|
|
// VPMOVQW zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQW zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVQW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQW xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVQW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVQW ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVQW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSDB performs "Down Convert Packed Doubleword Values to Byte Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSDB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSDB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVSDB zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVSDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDB xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDB ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSDB", 2, Operands { v0, v1 })
|
|
// VPMOVSDB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDB zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVSDB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDB xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVSDB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDB ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSDW performs "Down Convert Packed Doubleword Values to Word Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSDW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSDW zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVSDW zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVSDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDW xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSDW ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSDW", 2, Operands { v0, v1 })
|
|
// VPMOVSDW zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDW zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVSDW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDW xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVSDW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSDW ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSQB performs "Down Convert Packed Quadword Values to Byte Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSQB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSQB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVSQB zmm, m64{k}{z} [AVX512F]
|
|
// * VPMOVSQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQB xmm, m16{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQB ymm, m32{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSQB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSQB", 2, Operands { v0, v1 })
|
|
// VPMOVSQB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQB zmm, m64{k}{z}
|
|
if isZMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVSQB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQB xmm, m16{k}{z}
|
|
if isEVEXXMM(v0) && isM16kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 2)
|
|
})
|
|
}
|
|
// VPMOVSQB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQB ymm, m32{k}{z}
|
|
if isEVEXYMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSQD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSQD zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVSQD zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVSQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQD xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQD ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSQD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSQD", 2, Operands { v0, v1 })
|
|
// VPMOVSQD zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQD zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVSQD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQD xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVSQD ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQD ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSQW performs "Down Convert Packed Quadword Values to Word Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSQW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSQW zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVSQW zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVSQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQW xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSQW ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSQW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSQW", 2, Operands { v0, v1 })
|
|
// VPMOVSQW zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQW zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVSQW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQW xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVSQW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSQW ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSQW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSWB performs "Down Convert Packed Word Values to Byte Values with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPMOVSWB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVSWB zmm, ymm{k}{z} [AVX512BW]
|
|
// * VPMOVSWB zmm, m256{k}{z} [AVX512BW]
|
|
// * VPMOVSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSWB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSWB", 2, Operands { v0, v1 })
|
|
// VPMOVSWB zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSWB zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVSWB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSWB xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVSWB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVSWB ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXBD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXBD xmm, xmm [AVX]
|
|
// * VPMOVSXBD m32, xmm [AVX]
|
|
// * VPMOVSXBD xmm, ymm [AVX2]
|
|
// * VPMOVSXBD m64, ymm [AVX2]
|
|
// * VPMOVSXBD xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXBD m128, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXBD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBD m32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBD m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXBD", 2, Operands { v0, v1 })
|
|
// VPMOVSXBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBD m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBD xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBD m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMOVSXBD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x21)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBD m32, xmm{k}{z}
|
|
if isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPMOVSXBD m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x21)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXBQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXBQ xmm, xmm [AVX]
|
|
// * VPMOVSXBQ m16, xmm [AVX]
|
|
// * VPMOVSXBQ xmm, ymm [AVX2]
|
|
// * VPMOVSXBQ m32, ymm [AVX2]
|
|
// * VPMOVSXBQ xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXBQ m64, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBQ m16, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXBQ m32, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXBQ", 2, Operands { v0, v1 })
|
|
// VPMOVSXBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBQ m16, xmm
|
|
if isM16(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBQ m32, ymm
|
|
if isM32(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBQ xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBQ m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVSXBQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x22)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBQ m16, xmm{k}{z}
|
|
if isM16(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 2)
|
|
})
|
|
}
|
|
// VPMOVSXBQ m32, ymm{k}{z}
|
|
if isM32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x22)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXBW xmm, xmm [AVX]
|
|
// * VPMOVSXBW m64, xmm [AVX]
|
|
// * VPMOVSXBW xmm, ymm [AVX2]
|
|
// * VPMOVSXBW m128, ymm [AVX2]
|
|
// * VPMOVSXBW ymm, zmm{k}{z} [AVX512BW]
|
|
// * VPMOVSXBW m256, zmm{k}{z} [AVX512BW]
|
|
// * VPMOVSXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSXBW m64, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVSXBW m128, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXBW", 2, Operands { v0, v1 })
|
|
// VPMOVSXBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBW m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBW xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBW m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXBW ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBW m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVSXBW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBW xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x20)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXBW m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVSXBW m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x20)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXDQ xmm, xmm [AVX]
|
|
// * VPMOVSXDQ m64, xmm [AVX]
|
|
// * VPMOVSXDQ xmm, ymm [AVX2]
|
|
// * VPMOVSXDQ m128, ymm [AVX2]
|
|
// * VPMOVSXDQ ymm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXDQ m256, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXDQ m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXDQ m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXDQ", 2, Operands { v0, v1 })
|
|
// VPMOVSXDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXDQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXDQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXDQ m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXDQ ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXDQ m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVSXDQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXDQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXDQ m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVSXDQ m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXWD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXWD xmm, xmm [AVX]
|
|
// * VPMOVSXWD m64, xmm [AVX]
|
|
// * VPMOVSXWD xmm, ymm [AVX2]
|
|
// * VPMOVSXWD m128, ymm [AVX2]
|
|
// * VPMOVSXWD ymm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXWD m256, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXWD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWD m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWD m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXWD", 2, Operands { v0, v1 })
|
|
// VPMOVSXWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXWD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWD m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXWD ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWD m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVSXWD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWD m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVSXWD m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension".
|
|
//
|
|
// Mnemonic : VPMOVSXWQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVSXWQ xmm, xmm [AVX]
|
|
// * VPMOVSXWQ m32, xmm [AVX]
|
|
// * VPMOVSXWQ xmm, ymm [AVX2]
|
|
// * VPMOVSXWQ m64, ymm [AVX2]
|
|
// * VPMOVSXWQ xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXWQ m128, zmm{k}{z} [AVX512F]
|
|
// * VPMOVSXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWQ m32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVSXWQ m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVSXWQ", 2, Operands { v0, v1 })
|
|
// VPMOVSXWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWQ m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXWQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWQ m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVSXWQ xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWQ m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMOVSXWQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x24)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVSXWQ m32, xmm{k}{z}
|
|
if isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPMOVSXWQ m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x24)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVSXWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSDB performs "Down Convert Packed Doubleword Values to Byte Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSDB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSDB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVUSDB zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVUSDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDB xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDB ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSDB", 2, Operands { v0, v1 })
|
|
// VPMOVUSDB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDB zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVUSDB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDB xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVUSDB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDB ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSDW performs "Down Convert Packed Doubleword Values to Word Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSDW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSDW zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVUSDW zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVUSDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDW xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSDW ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSDW", 2, Operands { v0, v1 })
|
|
// VPMOVUSDW zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDW zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVUSDW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDW xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVUSDW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x13)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSDW ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x13)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSQB performs "Down Convert Packed Quadword Values to Byte Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSQB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSQB zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVUSQB zmm, m64{k}{z} [AVX512F]
|
|
// * VPMOVUSQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQB xmm, m16{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQB ymm, m32{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSQB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSQB", 2, Operands { v0, v1 })
|
|
// VPMOVUSQB zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQB zmm, m64{k}{z}
|
|
if isZMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVUSQB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQB xmm, m16{k}{z}
|
|
if isEVEXXMM(v0) && isM16kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 2)
|
|
})
|
|
}
|
|
// VPMOVUSQB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQB ymm, m32{k}{z}
|
|
if isEVEXYMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSQD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSQD zmm, ymm{k}{z} [AVX512F]
|
|
// * VPMOVUSQD zmm, m256{k}{z} [AVX512F]
|
|
// * VPMOVUSQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQD xmm, m64{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQD ymm, m128{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSQD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSQD", 2, Operands { v0, v1 })
|
|
// VPMOVUSQD zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQD zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVUSQD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQD xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVUSQD ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQD ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSQW performs "Down Convert Packed Quadword Values to Word Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSQW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSQW zmm, xmm{k}{z} [AVX512F]
|
|
// * VPMOVUSQW zmm, m128{k}{z} [AVX512F]
|
|
// * VPMOVUSQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQW xmm, m32{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVUSQW ymm, m64{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSQW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSQW", 2, Operands { v0, v1 })
|
|
// VPMOVUSQW zmm, xmm{k}{z}
|
|
if isZMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQW zmm, m128{k}{z}
|
|
if isZMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
// VPMOVUSQW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQW xmm, m32{k}{z}
|
|
if isEVEXXMM(v0) && isM32kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPMOVUSQW ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSQW ymm, m64{k}{z}
|
|
if isEVEXYMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSQW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVUSWB performs "Down Convert Packed Word Values to Byte Values with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPMOVUSWB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVUSWB zmm, ymm{k}{z} [AVX512BW]
|
|
// * VPMOVUSWB zmm, m256{k}{z} [AVX512BW]
|
|
// * VPMOVUSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVUSWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVUSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVUSWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVUSWB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVUSWB", 2, Operands { v0, v1 })
|
|
// VPMOVUSWB zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSWB zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVUSWB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSWB xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVUSWB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVUSWB ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVUSWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVW2M performs "Move Signs of Packed Word Integers to Mask Register".
|
|
//
|
|
// Mnemonic : VPMOVW2M
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPMOVW2M zmm, k [AVX512BW]
|
|
// * VPMOVW2M xmm, k [AVX512BW,AVX512VL]
|
|
// * VPMOVW2M ymm, k [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVW2M(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVW2M", 2, Operands { v0, v1 })
|
|
// VPMOVW2M zmm, k
|
|
if isZMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x48)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVW2M xmm, k
|
|
if isEVEXXMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x08)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVW2M ymm, k
|
|
if isEVEXYMM(v0) && isK(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfe)
|
|
m.emit(0x28)
|
|
m.emit(0x29)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVW2M")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVWB performs "Down Convert Packed Word Values to Byte Values with Truncation".
|
|
//
|
|
// Mnemonic : VPMOVWB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMOVWB zmm, ymm{k}{z} [AVX512BW]
|
|
// * VPMOVWB zmm, m256{k}{z} [AVX512BW]
|
|
// * VPMOVWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVWB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVWB", 2, Operands { v0, v1 })
|
|
// VPMOVWB zmm, ymm{k}{z}
|
|
if isZMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVWB zmm, m256{k}{z}
|
|
if isZMM(v0) && isM256kz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 32)
|
|
})
|
|
}
|
|
// VPMOVWB xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVWB xmm, m64{k}{z}
|
|
if isEVEXXMM(v0) && isM64kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPMOVWB ymm, xmm{k}{z}
|
|
if isEVEXYMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VPMOVWB ymm, m128{k}{z}
|
|
if isEVEXYMM(v0) && isM128kz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVWB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXBD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXBD xmm, xmm [AVX]
|
|
// * VPMOVZXBD m32, xmm [AVX]
|
|
// * VPMOVZXBD xmm, ymm [AVX2]
|
|
// * VPMOVZXBD m64, ymm [AVX2]
|
|
// * VPMOVZXBD xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXBD m128, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXBD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBD m32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBD m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXBD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXBD", 2, Operands { v0, v1 })
|
|
// VPMOVZXBD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBD m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBD m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBD xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBD m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMOVZXBD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBD m32, xmm{k}{z}
|
|
if isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPMOVZXBD m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXBQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXBQ xmm, xmm [AVX]
|
|
// * VPMOVZXBQ m16, xmm [AVX]
|
|
// * VPMOVZXBQ xmm, ymm [AVX2]
|
|
// * VPMOVZXBQ m32, ymm [AVX2]
|
|
// * VPMOVZXBQ xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXBQ m64, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBQ m16, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXBQ m32, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXBQ", 2, Operands { v0, v1 })
|
|
// VPMOVZXBQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBQ m16, xmm
|
|
if isM16(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBQ m32, ymm
|
|
if isM32(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBQ xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBQ m64, zmm{k}{z}
|
|
if isM64(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVZXBQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBQ m16, xmm{k}{z}
|
|
if isM16(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 2)
|
|
})
|
|
}
|
|
// VPMOVZXBQ m32, ymm{k}{z}
|
|
if isM32(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXBW xmm, xmm [AVX]
|
|
// * VPMOVZXBW m64, xmm [AVX]
|
|
// * VPMOVZXBW xmm, ymm [AVX2]
|
|
// * VPMOVZXBW m128, ymm [AVX2]
|
|
// * VPMOVZXBW ymm, zmm{k}{z} [AVX512BW]
|
|
// * VPMOVZXBW m256, zmm{k}{z} [AVX512BW]
|
|
// * VPMOVZXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVZXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVZXBW m64, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMOVZXBW m128, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXBW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXBW", 2, Operands { v0, v1 })
|
|
// VPMOVZXBW xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBW m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBW xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBW m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXBW ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBW m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVZXBW xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBW xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXBW m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVZXBW m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXDQ xmm, xmm [AVX]
|
|
// * VPMOVZXDQ m64, xmm [AVX]
|
|
// * VPMOVZXDQ xmm, ymm [AVX2]
|
|
// * VPMOVZXDQ m128, ymm [AVX2]
|
|
// * VPMOVZXDQ ymm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXDQ m256, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXDQ m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXDQ m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXDQ", 2, Operands { v0, v1 })
|
|
// VPMOVZXDQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXDQ m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXDQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXDQ m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXDQ ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXDQ m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVZXDQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXDQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x35)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXDQ m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVZXDQ m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x35)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXWD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXWD xmm, xmm [AVX]
|
|
// * VPMOVZXWD m64, xmm [AVX]
|
|
// * VPMOVZXWD xmm, ymm [AVX2]
|
|
// * VPMOVZXWD m128, ymm [AVX2]
|
|
// * VPMOVZXWD ymm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXWD m256, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXWD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWD xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWD m64, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWD m128, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXWD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXWD", 2, Operands { v0, v1 })
|
|
// VPMOVZXWD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWD m64, xmm
|
|
if isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXWD xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWD m128, ymm
|
|
if isM128(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXWD ymm, zmm{k}{z}
|
|
if isEVEXYMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWD m256, zmm{k}{z}
|
|
if isM256(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMOVZXWD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWD xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWD m64, xmm{k}{z}
|
|
if isM64(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VPMOVZXWD m128, ymm{k}{z}
|
|
if isM128(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension".
|
|
//
|
|
// Mnemonic : VPMOVZXWQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMOVZXWQ xmm, xmm [AVX]
|
|
// * VPMOVZXWQ m32, xmm [AVX]
|
|
// * VPMOVZXWQ xmm, ymm [AVX2]
|
|
// * VPMOVZXWQ m64, ymm [AVX2]
|
|
// * VPMOVZXWQ xmm, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXWQ m128, zmm{k}{z} [AVX512F]
|
|
// * VPMOVZXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWQ m32, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMOVZXWQ m64, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPMOVZXWQ", 2, Operands { v0, v1 })
|
|
// VPMOVZXWQ xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWQ m32, xmm
|
|
if isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXWQ xmm, ymm
|
|
if isXMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWQ m64, ymm
|
|
if isM64(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMOVZXWQ xmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWQ m128, zmm{k}{z}
|
|
if isM128(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMOVZXWQ xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWQ xmm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x34)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMOVZXWQ m32, xmm{k}{z}
|
|
if isM32(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VPMOVZXWQ m64, ymm{k}{z}
|
|
if isM64(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
|
|
m.emit(0x34)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMOVZXWQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result".
|
|
//
|
|
// Mnemonic : VPMULDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULDQ xmm, xmm, xmm [AVX]
|
|
// * VPMULDQ m128, xmm, xmm [AVX]
|
|
// * VPMULDQ ymm, ymm, ymm [AVX2]
|
|
// * VPMULDQ m256, ymm, ymm [AVX2]
|
|
// * VPMULDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULDQ", 3, Operands { v0, v1, v2 })
|
|
// VPMULDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x28)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMULDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x28)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale".
|
|
//
|
|
// Mnemonic : VPMULHRSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULHRSW xmm, xmm, xmm [AVX]
|
|
// * VPMULHRSW m128, xmm, xmm [AVX]
|
|
// * VPMULHRSW ymm, ymm, ymm [AVX2]
|
|
// * VPMULHRSW m256, ymm, ymm [AVX2]
|
|
// * VPMULHRSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHRSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHRSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHRSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHRSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHRSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULHRSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULHRSW", 3, Operands { v0, v1, v2 })
|
|
// VPMULHRSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHRSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHRSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHRSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHRSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHRSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULHRSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHRSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULHRSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHRSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULHRSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result".
|
|
//
|
|
// Mnemonic : VPMULHUW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULHUW xmm, xmm, xmm [AVX]
|
|
// * VPMULHUW m128, xmm, xmm [AVX]
|
|
// * VPMULHUW ymm, ymm, ymm [AVX2]
|
|
// * VPMULHUW m256, ymm, ymm [AVX2]
|
|
// * VPMULHUW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHUW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULHUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULHUW", 3, Operands { v0, v1, v2 })
|
|
// VPMULHUW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHUW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHUW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHUW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHUW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHUW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULHUW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHUW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULHUW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHUW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULHUW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULHW performs "Multiply Packed Signed Word Integers and Store High Result".
|
|
//
|
|
// Mnemonic : VPMULHW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULHW xmm, xmm, xmm [AVX]
|
|
// * VPMULHW m128, xmm, xmm [AVX]
|
|
// * VPMULHW ymm, ymm, ymm [AVX2]
|
|
// * VPMULHW m256, ymm, ymm [AVX2]
|
|
// * VPMULHW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULHW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULHW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULHW", 3, Operands { v0, v1, v2 })
|
|
// VPMULHW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULHW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULHW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULHW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULHW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result".
|
|
//
|
|
// Mnemonic : VPMULLD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULLD xmm, xmm, xmm [AVX]
|
|
// * VPMULLD m128, xmm, xmm [AVX]
|
|
// * VPMULLD ymm, ymm, ymm [AVX2]
|
|
// * VPMULLD m256, ymm, ymm [AVX2]
|
|
// * VPMULLD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULLD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULLD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULLD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULLD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULLD", 3, Operands { v0, v1, v2 })
|
|
// VPMULLD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULLD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULLD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULLD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULLD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMULLD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULLQ performs "Multiply Packed Signed Quadword Integers and Store Low Result".
|
|
//
|
|
// Mnemonic : VPMULLQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMULLQ m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VPMULLQ zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VPMULLQ m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VPMULLQ xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VPMULLQ m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VPMULLQ ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULLQ", 3, Operands { v0, v1, v2 })
|
|
// VPMULLQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULLQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULLQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x40)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMULLQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x40)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULLW performs "Multiply Packed Signed Word Integers and Store Low Result".
|
|
//
|
|
// Mnemonic : VPMULLW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULLW xmm, xmm, xmm [AVX]
|
|
// * VPMULLW m128, xmm, xmm [AVX]
|
|
// * VPMULLW ymm, ymm, ymm [AVX2]
|
|
// * VPMULLW m256, ymm, ymm [AVX2]
|
|
// * VPMULLW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULLW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPMULLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULLW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPMULLW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULLW", 3, Operands { v0, v1, v2 })
|
|
// VPMULLW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULLW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULLW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULLW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULLW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd5)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULLW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd5)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULTISHIFTQB performs "Select Packed Unaligned Bytes from Quadword Sources".
|
|
//
|
|
// Mnemonic : VPMULTISHIFTQB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPMULTISHIFTQB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPMULTISHIFTQB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
|
|
// * VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} [AVX512VBMI]
|
|
// * VPMULTISHIFTQB zmm, zmm, zmm{k}{z} [AVX512VBMI]
|
|
//
|
|
func (self *Program) VPMULTISHIFTQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULTISHIFTQB", 3, Operands { v0, v1, v2 })
|
|
// VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x83)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULTISHIFTQB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x83)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMULTISHIFTQB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI | ISA_AVX512VL)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x83)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULTISHIFTQB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512VBMI)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x83)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULTISHIFTQB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPMULUDQ performs "Multiply Packed Unsigned Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPMULUDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPMULUDQ xmm, xmm, xmm [AVX]
|
|
// * VPMULUDQ m128, xmm, xmm [AVX]
|
|
// * VPMULUDQ ymm, ymm, ymm [AVX2]
|
|
// * VPMULUDQ m256, ymm, ymm [AVX2]
|
|
// * VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULUDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULUDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPMULUDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPMULUDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPMULUDQ", 3, Operands { v0, v1, v2 })
|
|
// VPMULUDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULUDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULUDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULUDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPMULUDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPMULUDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULUDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPMULUDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPMULUDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xf4)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPMULUDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf4)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPMULUDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPOPCNTD performs "Packed Population Count for Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPOPCNTD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPOPCNTD m512/m32bcst, zmm{k}{z} [AVX512VPOPCNTDQ]
|
|
// * VPOPCNTD zmm, zmm{k}{z} [AVX512VPOPCNTDQ]
|
|
//
|
|
func (self *Program) VPOPCNTD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPOPCNTD", 2, Operands { v0, v1 })
|
|
// VPOPCNTD m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512VPOPCNTDQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPOPCNTD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512VPOPCNTDQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPOPCNTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPOPCNTQ performs "Packed Population Count for Quadword Integers".
|
|
//
|
|
// Mnemonic : VPOPCNTQ
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VPOPCNTQ m512/m64bcst, zmm{k}{z} [AVX512VPOPCNTDQ]
|
|
// * VPOPCNTQ zmm, zmm{k}{z} [AVX512VPOPCNTDQ]
|
|
//
|
|
func (self *Program) VPOPCNTQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPOPCNTQ", 2, Operands { v0, v1 })
|
|
// VPOPCNTQ m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512VPOPCNTDQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x55)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPOPCNTQ zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512VPOPCNTDQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x55)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPOPCNTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPOR performs "Packed Bitwise Logical OR".
|
|
//
|
|
// Mnemonic : VPOR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPOR xmm, xmm, xmm [AVX]
|
|
// * VPOR m128, xmm, xmm [AVX]
|
|
// * VPOR ymm, ymm, ymm [AVX2]
|
|
// * VPOR m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPOR", 3, Operands { v0, v1, v2 })
|
|
// VPOR xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPOR m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPOR ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPOR m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPOR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPORD performs "Bitwise Logical OR of Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPORD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPORD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPORD", 3, Operands { v0, v1, v2 })
|
|
// VPORD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPORD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPORD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPORD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPORD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPORD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPORQ performs "Bitwise Logical OR of Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPORQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPORQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPORQ", 3, Operands { v0, v1, v2 })
|
|
// VPORQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPORQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPORQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPORQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPORQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xeb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPORQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xeb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPPERM performs "Packed Permute Bytes".
|
|
//
|
|
// Mnemonic : VPPERM
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPPERM xmm, xmm, xmm, xmm [XOP]
|
|
// * VPPERM m128, xmm, xmm, xmm [XOP]
|
|
// * VPPERM xmm, m128, xmm, xmm [XOP]
|
|
//
|
|
func (self *Program) VPPERM(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPPERM", 4, Operands { v0, v1, v2, v3 })
|
|
// VPPERM xmm, xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa3)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0xa3)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPPERM m128, xmm, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2]))
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[3]), addr(v[0]), 1)
|
|
m.emit(hlcode(v[1]) << 4)
|
|
})
|
|
}
|
|
// VPPERM xmm, m128, xmm, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.emit(hlcode(v[0]) << 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPPERM")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROLD performs "Rotate Packed Doubleword Left".
|
|
//
|
|
// Mnemonic : VPROLD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPROLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPROLD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPROLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROLD", 3, Operands { v0, v1, v2 })
|
|
// VPROLD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROLQ performs "Rotate Packed Quadword Left".
|
|
//
|
|
// Mnemonic : VPROLQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPROLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPROLQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPROLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROLQ", 3, Operands { v0, v1, v2 })
|
|
// VPROLQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLQ imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(1, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLQ imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROLQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xc8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROLVD performs "Variable Rotate Packed Doubleword Left".
|
|
//
|
|
// Mnemonic : VPROLVD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPROLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLVD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPROLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROLVD", 3, Operands { v0, v1, v2 })
|
|
// VPROLVD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPROLVD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROLVD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPROLVD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROLVD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPROLVD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROLVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROLVQ performs "Variable Rotate Packed Quadword Left".
|
|
//
|
|
// Mnemonic : VPROLVQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPROLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLVQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPROLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPROLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPROLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROLVQ", 3, Operands { v0, v1, v2 })
|
|
// VPROLVQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPROLVQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROLVQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPROLVQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROLVQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPROLVQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROLVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPRORD performs "Rotate Packed Doubleword Right".
|
|
//
|
|
// Mnemonic : VPRORD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPRORD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPRORD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPRORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPRORD", 3, Operands { v0, v1, v2 })
|
|
// VPRORD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPRORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPRORQ performs "Rotate Packed Quadword Right".
|
|
//
|
|
// Mnemonic : VPRORQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPRORQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPRORQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPRORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPRORQ", 3, Operands { v0, v1, v2 })
|
|
// VPRORQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORQ imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(0, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORQ imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPRORQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xc0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPRORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPRORVD performs "Variable Rotate Packed Doubleword Right".
|
|
//
|
|
// Mnemonic : VPRORVD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPRORVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORVD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPRORVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPRORVD", 3, Operands { v0, v1, v2 })
|
|
// VPRORVD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPRORVD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPRORVD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPRORVD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPRORVD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPRORVD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPRORVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPRORVQ performs "Variable Rotate Packed Quadword Right".
|
|
//
|
|
// Mnemonic : VPRORVQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPRORVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORVQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPRORVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPRORVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPRORVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPRORVQ", 3, Operands { v0, v1, v2 })
|
|
// VPRORVQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPRORVQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPRORVQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPRORVQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPRORVQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPRORVQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPRORVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROTB performs "Packed Rotate Bytes".
|
|
//
|
|
// Mnemonic : VPROTB
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPROTB imm8, xmm, xmm [XOP]
|
|
// * VPROTB xmm, xmm, xmm [XOP]
|
|
// * VPROTB m128, xmm, xmm [XOP]
|
|
// * VPROTB imm8, m128, xmm [XOP]
|
|
// * VPROTB xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPROTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROTB", 3, Operands { v0, v1, v2 })
|
|
// VPROTB imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc0)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x90)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROTB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPROTB imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xc0)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTB xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x90)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROTB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROTD performs "Packed Rotate Doublewords".
|
|
//
|
|
// Mnemonic : VPROTD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPROTD imm8, xmm, xmm [XOP]
|
|
// * VPROTD xmm, xmm, xmm [XOP]
|
|
// * VPROTD m128, xmm, xmm [XOP]
|
|
// * VPROTD imm8, m128, xmm [XOP]
|
|
// * VPROTD xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPROTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROTD", 3, Operands { v0, v1, v2 })
|
|
// VPROTD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x92)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROTD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPROTD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xc2)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTD xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x92)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROTD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROTQ performs "Packed Rotate Quadwords".
|
|
//
|
|
// Mnemonic : VPROTQ
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPROTQ imm8, xmm, xmm [XOP]
|
|
// * VPROTQ xmm, xmm, xmm [XOP]
|
|
// * VPROTQ m128, xmm, xmm [XOP]
|
|
// * VPROTQ imm8, m128, xmm [XOP]
|
|
// * VPROTQ xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPROTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROTQ", 3, Operands { v0, v1, v2 })
|
|
// VPROTQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x93)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROTQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPROTQ imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xc3)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTQ xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x93)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROTQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPROTW performs "Packed Rotate Words".
|
|
//
|
|
// Mnemonic : VPROTW
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VPROTW imm8, xmm, xmm [XOP]
|
|
// * VPROTW xmm, xmm, xmm [XOP]
|
|
// * VPROTW m128, xmm, xmm [XOP]
|
|
// * VPROTW imm8, m128, xmm [XOP]
|
|
// * VPROTW xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPROTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPROTW", 3, Operands { v0, v1, v2 })
|
|
// VPROTW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x91)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x91)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPROTW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPROTW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0xc1)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPROTW xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x91)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPROTW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSADBW performs "Compute Sum of Absolute Differences".
|
|
//
|
|
// Mnemonic : VPSADBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSADBW xmm, xmm, xmm [AVX]
|
|
// * VPSADBW m128, xmm, xmm [AVX]
|
|
// * VPSADBW ymm, ymm, ymm [AVX2]
|
|
// * VPSADBW m256, ymm, ymm [AVX2]
|
|
// * VPSADBW zmm, zmm, zmm [AVX512BW]
|
|
// * VPSADBW m512, zmm, zmm [AVX512BW]
|
|
// * VPSADBW xmm, xmm, xmm [AVX512BW,AVX512VL]
|
|
// * VPSADBW m128, xmm, xmm [AVX512BW,AVX512VL]
|
|
// * VPSADBW ymm, ymm, ymm [AVX512BW,AVX512VL]
|
|
// * VPSADBW m256, ymm, ymm [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSADBW", 3, Operands { v0, v1, v2 })
|
|
// VPSADBW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSADBW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSADBW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSADBW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSADBW zmm, zmm, zmm
|
|
if isZMM(v0) && isZMM(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSADBW m512, zmm, zmm
|
|
if isM512(v0) && isZMM(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSADBW xmm, xmm, xmm
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSADBW m128, xmm, xmm
|
|
if isM128(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSADBW ymm, ymm, ymm
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x20)
|
|
m.emit(0xf6)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSADBW m256, ymm, ymm
|
|
if isM256(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
|
|
m.emit(0xf6)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSADBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSCATTERDD performs "Scatter Packed Doubleword Values with Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VPSCATTERDD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSCATTERDD zmm, vm32z{k} [AVX512F]
|
|
// * VPSCATTERDD xmm, vm32x{k} [AVX512F,AVX512VL]
|
|
// * VPSCATTERDD ymm, vm32y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSCATTERDD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPSCATTERDD", 2, Operands { v0, v1 })
|
|
// VPSCATTERDD zmm, vm32z{k}
|
|
if isZMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPSCATTERDD xmm, vm32x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPSCATTERDD ymm, vm32y{k}
|
|
if isEVEXYMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSCATTERDD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSCATTERDQ performs "Scatter Packed Quadword Values with Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VPSCATTERDQ
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSCATTERDQ zmm, vm32y{k} [AVX512F]
|
|
// * VPSCATTERDQ xmm, vm32x{k} [AVX512F,AVX512VL]
|
|
// * VPSCATTERDQ ymm, vm32x{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSCATTERDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPSCATTERDQ", 2, Operands { v0, v1 })
|
|
// VPSCATTERDQ zmm, vm32y{k}
|
|
if isZMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPSCATTERDQ xmm, vm32x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPSCATTERDQ ymm, vm32x{k}
|
|
if isEVEXYMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSCATTERDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSCATTERQD performs "Scatter Packed Doubleword Values with Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VPSCATTERQD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSCATTERQD ymm, vm64z{k} [AVX512F]
|
|
// * VPSCATTERQD xmm, vm64x{k} [AVX512F,AVX512VL]
|
|
// * VPSCATTERQD xmm, vm64y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSCATTERQD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPSCATTERQD", 2, Operands { v0, v1 })
|
|
// VPSCATTERQD ymm, vm64z{k}
|
|
if isEVEXYMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPSCATTERQD xmm, vm64x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VPSCATTERQD xmm, vm64y{k}
|
|
if isEVEXXMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSCATTERQD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSCATTERQQ performs "Scatter Packed Quadword Values with Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VPSCATTERQQ
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSCATTERQQ zmm, vm64z{k} [AVX512F]
|
|
// * VPSCATTERQQ xmm, vm64x{k} [AVX512F,AVX512VL]
|
|
// * VPSCATTERQQ ymm, vm64y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSCATTERQQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPSCATTERQQ", 2, Operands { v0, v1 })
|
|
// VPSCATTERQQ zmm, vm64z{k}
|
|
if isZMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPSCATTERQQ xmm, vm64x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VPSCATTERQQ ymm, vm64y{k}
|
|
if isEVEXYMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSCATTERQQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHAB performs "Packed Shift Arithmetic Bytes".
|
|
//
|
|
// Mnemonic : VPSHAB
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHAB xmm, xmm, xmm [XOP]
|
|
// * VPSHAB m128, xmm, xmm [XOP]
|
|
// * VPSHAB xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHAB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHAB", 3, Operands { v0, v1, v2 })
|
|
// VPSHAB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x98)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHAB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHAB xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x98)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHAB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHAD performs "Packed Shift Arithmetic Doublewords".
|
|
//
|
|
// Mnemonic : VPSHAD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHAD xmm, xmm, xmm [XOP]
|
|
// * VPSHAD m128, xmm, xmm [XOP]
|
|
// * VPSHAD xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHAD", 3, Operands { v0, v1, v2 })
|
|
// VPSHAD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHAD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHAD xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x9a)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHAD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHAQ performs "Packed Shift Arithmetic Quadwords".
|
|
//
|
|
// Mnemonic : VPSHAQ
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHAQ xmm, xmm, xmm [XOP]
|
|
// * VPSHAQ m128, xmm, xmm [XOP]
|
|
// * VPSHAQ xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHAQ", 3, Operands { v0, v1, v2 })
|
|
// VPSHAQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x9b)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHAQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHAQ xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x9b)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHAQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHAW performs "Packed Shift Arithmetic Words".
|
|
//
|
|
// Mnemonic : VPSHAW
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHAW xmm, xmm, xmm [XOP]
|
|
// * VPSHAW m128, xmm, xmm [XOP]
|
|
// * VPSHAW xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHAW", 3, Operands { v0, v1, v2 })
|
|
// VPSHAW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x99)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHAW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHAW xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x99)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHAW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHLB performs "Packed Shift Logical Bytes".
|
|
//
|
|
// Mnemonic : VPSHLB
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHLB xmm, xmm, xmm [XOP]
|
|
// * VPSHLB m128, xmm, xmm [XOP]
|
|
// * VPSHLB xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHLB", 3, Operands { v0, v1, v2 })
|
|
// VPSHLB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x94)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x94)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHLB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x94)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHLB xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x94)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHLB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHLD performs "Packed Shift Logical Doublewords".
|
|
//
|
|
// Mnemonic : VPSHLD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHLD xmm, xmm, xmm [XOP]
|
|
// * VPSHLD m128, xmm, xmm [XOP]
|
|
// * VPSHLD xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHLD", 3, Operands { v0, v1, v2 })
|
|
// VPSHLD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x96)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHLD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHLD xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x96)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHLQ performs "Packed Shift Logical Quadwords".
|
|
//
|
|
// Mnemonic : VPSHLQ
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHLQ xmm, xmm, xmm [XOP]
|
|
// * VPSHLQ m128, xmm, xmm [XOP]
|
|
// * VPSHLQ xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHLQ", 3, Operands { v0, v1, v2 })
|
|
// VPSHLQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x97)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHLQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHLQ xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x97)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHLW performs "Packed Shift Logical Words".
|
|
//
|
|
// Mnemonic : VPSHLW
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VPSHLW xmm, xmm, xmm [XOP]
|
|
// * VPSHLW m128, xmm, xmm [XOP]
|
|
// * VPSHLW xmm, m128, xmm [XOP]
|
|
//
|
|
func (self *Program) VPSHLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHLW", 3, Operands { v0, v1, v2 })
|
|
// VPSHLW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x78 ^ (hlcode(v[0]) << 3))
|
|
m.emit(0x95)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x8f)
|
|
m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf8 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x95)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHLW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x95)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHLW xmm, m128, xmm
|
|
if isXMM(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_XOP)
|
|
p.domain = DomainAMDSpecific
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
|
|
m.emit(0x95)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHUFB performs "Packed Shuffle Bytes".
|
|
//
|
|
// Mnemonic : VPSHUFB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSHUFB xmm, xmm, xmm [AVX]
|
|
// * VPSHUFB m128, xmm, xmm [AVX]
|
|
// * VPSHUFB ymm, ymm, ymm [AVX2]
|
|
// * VPSHUFB m256, ymm, ymm [AVX2]
|
|
// * VPSHUFB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSHUFB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHUFB", 3, Operands { v0, v1, v2 })
|
|
// VPSHUFB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHUFB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSHUFB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSHUFB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSHUFB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x00)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x00)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHUFB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHUFD performs "Shuffle Packed Doublewords".
|
|
//
|
|
// Mnemonic : VPSHUFD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSHUFD imm8, xmm, xmm [AVX]
|
|
// * VPSHUFD imm8, m128, xmm [AVX]
|
|
// * VPSHUFD imm8, ymm, ymm [AVX2]
|
|
// * VPSHUFD imm8, m256, ymm [AVX2]
|
|
// * VPSHUFD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSHUFD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSHUFD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSHUFD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSHUFD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSHUFD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHUFD", 3, Operands { v0, v1, v2 })
|
|
// VPSHUFD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHUFD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHUFHW performs "Shuffle Packed High Words".
|
|
//
|
|
// Mnemonic : VPSHUFHW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSHUFHW imm8, xmm, xmm [AVX]
|
|
// * VPSHUFHW imm8, m128, xmm [AVX]
|
|
// * VPSHUFHW imm8, ymm, ymm [AVX2]
|
|
// * VPSHUFHW imm8, m256, ymm [AVX2]
|
|
// * VPSHUFHW imm8, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFHW imm8, m512, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFHW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFHW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFHW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFHW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHUFHW", 3, Operands { v0, v1, v2 })
|
|
// VPSHUFHW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(6, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, m512, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, m128, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFHW imm8, m256, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHUFHW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSHUFLW performs "Shuffle Packed Low Words".
|
|
//
|
|
// Mnemonic : VPSHUFLW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSHUFLW imm8, xmm, xmm [AVX]
|
|
// * VPSHUFLW imm8, m128, xmm [AVX]
|
|
// * VPSHUFLW imm8, ymm, ymm [AVX2]
|
|
// * VPSHUFLW imm8, m256, ymm [AVX2]
|
|
// * VPSHUFLW imm8, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFLW imm8, m512, zmm{k}{z} [AVX512BW]
|
|
// * VPSHUFLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSHUFLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSHUFLW", 3, Operands { v0, v1, v2 })
|
|
// VPSHUFLW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), v[1], 0)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(7, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, m512, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7f)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x70)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, m128, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSHUFLW imm8, m256, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x07, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x70)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSHUFLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSIGNB performs "Packed Sign of Byte Integers".
|
|
//
|
|
// Mnemonic : VPSIGNB
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPSIGNB xmm, xmm, xmm [AVX]
|
|
// * VPSIGNB m128, xmm, xmm [AVX]
|
|
// * VPSIGNB ymm, ymm, ymm [AVX2]
|
|
// * VPSIGNB m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPSIGNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSIGNB", 3, Operands { v0, v1, v2 })
|
|
// VPSIGNB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGNB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSIGNB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGNB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSIGNB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSIGND performs "Packed Sign of Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPSIGND
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPSIGND xmm, xmm, xmm [AVX]
|
|
// * VPSIGND m128, xmm, xmm [AVX]
|
|
// * VPSIGND ymm, ymm, ymm [AVX2]
|
|
// * VPSIGND m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPSIGND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSIGND", 3, Operands { v0, v1, v2 })
|
|
// VPSIGND xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGND m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSIGND ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGND m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSIGND")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSIGNW performs "Packed Sign of Word Integers".
|
|
//
|
|
// Mnemonic : VPSIGNW
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPSIGNW xmm, xmm, xmm [AVX]
|
|
// * VPSIGNW m128, xmm, xmm [AVX]
|
|
// * VPSIGNW ymm, ymm, ymm [AVX2]
|
|
// * VPSIGNW m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPSIGNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSIGNW", 3, Operands { v0, v1, v2 })
|
|
// VPSIGNW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGNW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSIGNW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSIGNW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSIGNW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLD performs "Shift Packed Doubleword Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLD
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSLLD imm8, xmm, xmm [AVX]
|
|
// * VPSLLD xmm, xmm, xmm [AVX]
|
|
// * VPSLLD m128, xmm, xmm [AVX]
|
|
// * VPSLLD imm8, ymm, ymm [AVX2]
|
|
// * VPSLLD xmm, ymm, ymm [AVX2]
|
|
// * VPSLLD m128, ymm, ymm [AVX2]
|
|
// * VPSLLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSLLD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLD xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLD m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLD", 3, Operands { v0, v1, v2 })
|
|
// VPSLLD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(6, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(6, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(6, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLD m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLDQ performs "Shift Packed Double Quadword Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLDQ
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * VPSLLDQ imm8, xmm, xmm [AVX]
|
|
// * VPSLLDQ imm8, ymm, ymm [AVX2]
|
|
// * VPSLLDQ imm8, zmm, zmm [AVX512BW]
|
|
// * VPSLLDQ imm8, m512, zmm [AVX512BW]
|
|
// * VPSLLDQ imm8, xmm, xmm [AVX512BW,AVX512VL]
|
|
// * VPSLLDQ imm8, m128, xmm [AVX512BW,AVX512VL]
|
|
// * VPSLLDQ imm8, ymm, ymm [AVX512BW,AVX512VL]
|
|
// * VPSLLDQ imm8, m256, ymm [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLDQ", 3, Operands { v0, v1, v2 })
|
|
// VPSLLDQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, zmm, zmm
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40)
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, m512, zmm
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(7, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, xmm, xmm
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(7, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, ymm, ymm
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20)
|
|
m.emit(0x73)
|
|
m.emit(0xf8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLDQ imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(7, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLQ performs "Shift Packed Quadword Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLQ
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSLLQ imm8, xmm, xmm [AVX]
|
|
// * VPSLLQ xmm, xmm, xmm [AVX]
|
|
// * VPSLLQ m128, xmm, xmm [AVX]
|
|
// * VPSLLQ imm8, ymm, ymm [AVX2]
|
|
// * VPSLLQ xmm, ymm, ymm [AVX2]
|
|
// * VPSLLQ m128, ymm, ymm [AVX2]
|
|
// * VPSLLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSLLQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLQ xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLQ m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLQ", 3, Operands { v0, v1, v2 })
|
|
// VPSLLQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLQ imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(6, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLQ imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(6, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(6, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x73)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLQ m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLVD performs "Variable Shift Packed Doubleword Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLVD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSLLVD xmm, xmm, xmm [AVX2]
|
|
// * VPSLLVD m128, xmm, xmm [AVX2]
|
|
// * VPSLLVD ymm, ymm, ymm [AVX2]
|
|
// * VPSLLVD m256, ymm, ymm [AVX2]
|
|
// * VPSLLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLVD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLVD", 3, Operands { v0, v1, v2 })
|
|
// VPSLLVD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLVD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLVD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSLLVD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLVD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSLLVD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLVQ performs "Variable Shift Packed Quadword Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLVQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSLLVQ xmm, xmm, xmm [AVX2]
|
|
// * VPSLLVQ m128, xmm, xmm [AVX2]
|
|
// * VPSLLVQ ymm, ymm, ymm [AVX2]
|
|
// * VPSLLVQ m256, ymm, ymm [AVX2]
|
|
// * VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLVQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSLLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLVQ", 3, Operands { v0, v1, v2 })
|
|
// VPSLLVQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLVQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLVQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSLLVQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLVQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x47)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSLLVQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x47)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLVW performs "Variable Shift Packed Word Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLVW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPSLLVW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLVW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLVW", 3, Operands { v0, v1, v2 })
|
|
// VPSLLVW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSLLVW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLVW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x12)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLVW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x12)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSLLW performs "Shift Packed Word Data Left Logical".
|
|
//
|
|
// Mnemonic : VPSLLW
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSLLW imm8, xmm, xmm [AVX]
|
|
// * VPSLLW xmm, xmm, xmm [AVX]
|
|
// * VPSLLW m128, xmm, xmm [AVX]
|
|
// * VPSLLW imm8, ymm, ymm [AVX2]
|
|
// * VPSLLW xmm, ymm, ymm [AVX2]
|
|
// * VPSLLW m128, ymm, ymm [AVX2]
|
|
// * VPSLLW imm8, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLW xmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLW m128, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLW imm8, m512, zmm{k}{z} [AVX512BW]
|
|
// * VPSLLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSLLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSLLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSLLW", 3, Operands { v0, v1, v2 })
|
|
// VPSLLW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLW imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSLLW imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLW imm8, m512, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(6, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLW imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x71)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSLLW imm8, m128, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(6, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSLLW imm8, m256, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(6, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSLLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAD performs "Shift Packed Doubleword Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAD
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSRAD imm8, xmm, xmm [AVX]
|
|
// * VPSRAD xmm, xmm, xmm [AVX]
|
|
// * VPSRAD m128, xmm, xmm [AVX]
|
|
// * VPSRAD imm8, ymm, ymm [AVX2]
|
|
// * VPSRAD xmm, ymm, ymm [AVX2]
|
|
// * VPSRAD m128, ymm, ymm [AVX2]
|
|
// * VPSRAD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSRAD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAD xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAD m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAD", 3, Operands { v0, v1, v2 })
|
|
// VPSRAD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAD m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAQ performs "Shift Packed Quadword Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAQ
|
|
// Supported forms : (12 forms)
|
|
//
|
|
// * VPSRAQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSRAQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAQ xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAQ m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAQ", 3, Operands { v0, v1, v2 })
|
|
// VPSRAQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAQ imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(4, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAQ m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAVD performs "Variable Shift Packed Doubleword Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAVD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSRAVD xmm, xmm, xmm [AVX2]
|
|
// * VPSRAVD m128, xmm, xmm [AVX2]
|
|
// * VPSRAVD ymm, ymm, ymm [AVX2]
|
|
// * VPSRAVD m256, ymm, ymm [AVX2]
|
|
// * VPSRAVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAVD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAVD", 3, Operands { v0, v1, v2 })
|
|
// VPSRAVD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAVD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAVD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRAVD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAVD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSRAVD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAVQ performs "Variable Shift Packed Quadword Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAVQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAVQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRAVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAVQ", 3, Operands { v0, v1, v2 })
|
|
// VPSRAVQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRAVQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAVQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x46)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSRAVQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x46)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAVW performs "Variable Shift Packed Word Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAVW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPSRAVW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAVW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAVW", 3, Operands { v0, v1, v2 })
|
|
// VPSRAVW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRAVW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAVW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x11)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAVW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x11)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRAW performs "Shift Packed Word Data Right Arithmetic".
|
|
//
|
|
// Mnemonic : VPSRAW
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSRAW imm8, xmm, xmm [AVX]
|
|
// * VPSRAW xmm, xmm, xmm [AVX]
|
|
// * VPSRAW m128, xmm, xmm [AVX]
|
|
// * VPSRAW imm8, ymm, ymm [AVX2]
|
|
// * VPSRAW xmm, ymm, ymm [AVX2]
|
|
// * VPSRAW m128, ymm, ymm [AVX2]
|
|
// * VPSRAW imm8, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAW xmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAW m128, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAW imm8, m512, zmm{k}{z} [AVX512BW]
|
|
// * VPSRAW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRAW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRAW", 3, Operands { v0, v1, v2 })
|
|
// VPSRAW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAW imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRAW imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAW imm8, m512, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(4, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAW imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x71)
|
|
m.emit(0xe0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRAW imm8, m128, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(4, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRAW imm8, m256, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(4, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRAW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLD performs "Shift Packed Doubleword Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLD
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSRLD imm8, xmm, xmm [AVX]
|
|
// * VPSRLD xmm, xmm, xmm [AVX]
|
|
// * VPSRLD m128, xmm, xmm [AVX]
|
|
// * VPSRLD imm8, ymm, ymm [AVX2]
|
|
// * VPSRLD xmm, ymm, ymm [AVX2]
|
|
// * VPSRLD m128, ymm, ymm [AVX2]
|
|
// * VPSRLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSRLD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLD xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLD m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLD", 3, Operands { v0, v1, v2 })
|
|
// VPSRLD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLD imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(2, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLD imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(2, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x72)
|
|
m.mrsd(2, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x72)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd2)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLD m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd2)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLDQ performs "Shift Packed Double Quadword Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLDQ
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * VPSRLDQ imm8, xmm, xmm [AVX]
|
|
// * VPSRLDQ imm8, ymm, ymm [AVX2]
|
|
// * VPSRLDQ imm8, zmm, zmm [AVX512BW]
|
|
// * VPSRLDQ imm8, m512, zmm [AVX512BW]
|
|
// * VPSRLDQ imm8, xmm, xmm [AVX512BW,AVX512VL]
|
|
// * VPSRLDQ imm8, m128, xmm [AVX512BW,AVX512VL]
|
|
// * VPSRLDQ imm8, ymm, ymm [AVX512BW,AVX512VL]
|
|
// * VPSRLDQ imm8, m256, ymm [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLDQ", 3, Operands { v0, v1, v2 })
|
|
// VPSRLDQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, zmm, zmm
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40)
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, m512, zmm
|
|
if isImm8(v0) && isM512(v1) && isZMM(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(3, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, xmm, xmm
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(3, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, ymm, ymm
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20)
|
|
m.emit(0x73)
|
|
m.emit(0xd8 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLDQ imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
|
|
m.emit(0x73)
|
|
m.mrsd(3, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLQ performs "Shift Packed Quadword Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLQ
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSRLQ imm8, xmm, xmm [AVX]
|
|
// * VPSRLQ xmm, xmm, xmm [AVX]
|
|
// * VPSRLQ m128, xmm, xmm [AVX]
|
|
// * VPSRLQ imm8, ymm, ymm [AVX2]
|
|
// * VPSRLQ xmm, ymm, ymm [AVX2]
|
|
// * VPSRLQ m128, ymm, ymm [AVX2]
|
|
// * VPSRLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VPSRLQ imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLQ xmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLQ m128, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLQ", 3, Operands { v0, v1, v2 })
|
|
// VPSRLQ imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLQ imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLQ imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(2, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLQ imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(2, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x73)
|
|
m.mrsd(2, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLQ imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x73)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd3)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLQ m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd3)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLVD performs "Variable Shift Packed Doubleword Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLVD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSRLVD xmm, xmm, xmm [AVX2]
|
|
// * VPSRLVD m128, xmm, xmm [AVX2]
|
|
// * VPSRLVD ymm, ymm, ymm [AVX2]
|
|
// * VPSRLVD m256, ymm, ymm [AVX2]
|
|
// * VPSRLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLVD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLVD", 3, Operands { v0, v1, v2 })
|
|
// VPSRLVD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLVD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLVD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRLVD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLVD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSRLVD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLVD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLVQ performs "Variable Shift Packed Quadword Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLVQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSRLVQ xmm, xmm, xmm [AVX2]
|
|
// * VPSRLVQ m128, xmm, xmm [AVX2]
|
|
// * VPSRLVQ ymm, ymm, ymm [AVX2]
|
|
// * VPSRLVQ m256, ymm, ymm [AVX2]
|
|
// * VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLVQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSRLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLVQ", 3, Operands { v0, v1, v2 })
|
|
// VPSRLVQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xf9 ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLVQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLVQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRLVQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLVQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x45)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSRLVQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x45)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLVQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLVW performs "Variable Shift Packed Word Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLVW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPSRLVW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLVW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLVW", 3, Operands { v0, v1, v2 })
|
|
// VPSRLVW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSRLVW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLVW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x10)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLVW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x10)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLVW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSRLW performs "Shift Packed Word Data Right Logical".
|
|
//
|
|
// Mnemonic : VPSRLW
|
|
// Supported forms : (18 forms)
|
|
//
|
|
// * VPSRLW imm8, xmm, xmm [AVX]
|
|
// * VPSRLW xmm, xmm, xmm [AVX]
|
|
// * VPSRLW m128, xmm, xmm [AVX]
|
|
// * VPSRLW imm8, ymm, ymm [AVX2]
|
|
// * VPSRLW xmm, ymm, ymm [AVX2]
|
|
// * VPSRLW m128, ymm, ymm [AVX2]
|
|
// * VPSRLW imm8, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLW xmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLW m128, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLW imm8, m512, zmm{k}{z} [AVX512BW]
|
|
// * VPSRLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSRLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSRLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSRLW", 3, Operands { v0, v1, v2 })
|
|
// VPSRLW imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLW imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, 0, v[1], hlcode(v[2]))
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW xmm, ymm, ymm
|
|
if isXMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW m128, ymm, ymm
|
|
if isM128(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSRLW imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW xmm, zmm, zmm{k}{z}
|
|
if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW m128, zmm, zmm{k}{z}
|
|
if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLW imm8, m512, zmm{k}{z}
|
|
if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(2, addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLW imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ (ehcode(v[1]) << 5))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x71)
|
|
m.emit(0xd0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW xmm, ymm, ymm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd1)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW m128, ymm, ymm{k}{z}
|
|
if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd1)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSRLW imm8, m128, xmm{k}{z}
|
|
if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(2, addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPSRLW imm8, m256, ymm{k}{z}
|
|
if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x71)
|
|
m.mrsd(2, addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSRLW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBB performs "Subtract Packed Byte Integers".
|
|
//
|
|
// Mnemonic : VPSUBB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBB xmm, xmm, xmm [AVX]
|
|
// * VPSUBB m128, xmm, xmm [AVX]
|
|
// * VPSUBB ymm, ymm, ymm [AVX2]
|
|
// * VPSUBB m256, ymm, ymm [AVX2]
|
|
// * VPSUBB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBB", 3, Operands { v0, v1, v2 })
|
|
// VPSUBB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBD performs "Subtract Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPSUBD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBD xmm, xmm, xmm [AVX]
|
|
// * VPSUBD m128, xmm, xmm [AVX]
|
|
// * VPSUBD ymm, ymm, ymm [AVX2]
|
|
// * VPSUBD m256, ymm, ymm [AVX2]
|
|
// * VPSUBD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSUBD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSUBD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBD", 3, Operands { v0, v1, v2 })
|
|
// VPSUBD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfa)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSUBD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xfa)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBQ performs "Subtract Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPSUBQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBQ xmm, xmm, xmm [AVX]
|
|
// * VPSUBQ m128, xmm, xmm [AVX]
|
|
// * VPSUBQ ymm, ymm, ymm [AVX2]
|
|
// * VPSUBQ m256, ymm, ymm [AVX2]
|
|
// * VPSUBQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSUBQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPSUBQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPSUBQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBQ", 3, Operands { v0, v1, v2 })
|
|
// VPSUBQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xfb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPSUBQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xfb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPSUBSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBSB xmm, xmm, xmm [AVX]
|
|
// * VPSUBSB m128, xmm, xmm [AVX]
|
|
// * VPSUBSB ymm, ymm, ymm [AVX2]
|
|
// * VPSUBSB m256, ymm, ymm [AVX2]
|
|
// * VPSUBSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBSB", 3, Operands { v0, v1, v2 })
|
|
// VPSUBSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation".
|
|
//
|
|
// Mnemonic : VPSUBSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBSW xmm, xmm, xmm [AVX]
|
|
// * VPSUBSW m128, xmm, xmm [AVX]
|
|
// * VPSUBSW ymm, ymm, ymm [AVX2]
|
|
// * VPSUBSW m256, ymm, ymm [AVX2]
|
|
// * VPSUBSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBSW", 3, Operands { v0, v1, v2 })
|
|
// VPSUBSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xe9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xe9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPSUBUSB
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBUSB xmm, xmm, xmm [AVX]
|
|
// * VPSUBUSB m128, xmm, xmm [AVX]
|
|
// * VPSUBUSB ymm, ymm, ymm [AVX2]
|
|
// * VPSUBUSB m256, ymm, ymm [AVX2]
|
|
// * VPSUBUSB zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBUSB m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBUSB", 3, Operands { v0, v1, v2 })
|
|
// VPSUBUSB xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSB m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBUSB ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSB m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBUSB zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSB m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBUSB xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSB m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBUSB ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd8)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSB m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd8)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBUSB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation".
|
|
//
|
|
// Mnemonic : VPSUBUSW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBUSW xmm, xmm, xmm [AVX]
|
|
// * VPSUBUSW m128, xmm, xmm [AVX]
|
|
// * VPSUBUSW ymm, ymm, ymm [AVX2]
|
|
// * VPSUBUSW m256, ymm, ymm [AVX2]
|
|
// * VPSUBUSW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBUSW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBUSW", 3, Operands { v0, v1, v2 })
|
|
// VPSUBUSW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBUSW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBUSW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBUSW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBUSW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xd9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBUSW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xd9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBUSW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPSUBW performs "Subtract Packed Word Integers".
|
|
//
|
|
// Mnemonic : VPSUBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPSUBW xmm, xmm, xmm [AVX]
|
|
// * VPSUBW m128, xmm, xmm [AVX]
|
|
// * VPSUBW ymm, ymm, ymm [AVX2]
|
|
// * VPSUBW m256, ymm, ymm [AVX2]
|
|
// * VPSUBW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPSUBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPSUBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPSUBW", 3, Operands { v0, v1, v2 })
|
|
// VPSUBW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPSUBW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPSUBW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPSUBW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xf9)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPSUBW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xf9)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPSUBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTERNLOGD performs "Bitwise Ternary Logical Operation on Doubleword Values".
|
|
//
|
|
// Mnemonic : VPTERNLOGD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTERNLOGD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPTERNLOGD", 4, Operands { v0, v1, v2, v3 })
|
|
// VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGD imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGD imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGD imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTERNLOGD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTERNLOGQ performs "Bitwise Ternary Logical Operation on Quadword Values".
|
|
//
|
|
// Mnemonic : VPTERNLOGQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTERNLOGQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VPTERNLOGQ", 4, Operands { v0, v1, v2, v3 })
|
|
// VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x25)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x25)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTERNLOGQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTEST performs "Packed Logical Compare".
|
|
//
|
|
// Mnemonic : VPTEST
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPTEST xmm, xmm [AVX]
|
|
// * VPTEST m128, xmm [AVX]
|
|
// * VPTEST ymm, ymm [AVX]
|
|
// * VPTEST m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VPTEST(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VPTEST", 2, Operands { v0, v1 })
|
|
// VPTEST xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTEST m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPTEST ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x17)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTEST m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x17)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTEST")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTMB performs "Logical AND of Packed Byte Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTMB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTMB zmm, zmm, k{k} [AVX512BW]
|
|
// * VPTESTMB m512, zmm, k{k} [AVX512BW]
|
|
// * VPTESTMB xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMB m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMB ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMB m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTMB", 3, Operands { v0, v1, v2 })
|
|
// VPTESTMB zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMB m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTMB xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMB m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTMB ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMB m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTMB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTMD performs "Logical AND of Packed Doubleword Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTMD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTMD m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPTESTMD zmm, zmm, k{k} [AVX512F]
|
|
// * VPTESTMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMD xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMD ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTMD", 3, Operands { v0, v1, v2 })
|
|
// VPTESTMD m512/m32bcst, zmm, k{k}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTMD zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMD m128/m32bcst, xmm, k{k}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTMD xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMD m256/m32bcst, ymm, k{k}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPTESTMD ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTMD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTMQ performs "Logical AND of Packed Quadword Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTMQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTMQ m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPTESTMQ zmm, zmm, k{k} [AVX512F]
|
|
// * VPTESTMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMQ xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTMQ ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTMQ", 3, Operands { v0, v1, v2 })
|
|
// VPTESTMQ m512/m64bcst, zmm, k{k}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTMQ zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMQ m128/m64bcst, xmm, k{k}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTMQ xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMQ m256/m64bcst, ymm, k{k}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPTESTMQ ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTMQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTMW performs "Logical AND of Packed Word Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTMW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTMW zmm, zmm, k{k} [AVX512BW]
|
|
// * VPTESTMW m512, zmm, k{k} [AVX512BW]
|
|
// * VPTESTMW xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMW m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMW ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTMW m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTMW", 3, Operands { v0, v1, v2 })
|
|
// VPTESTMW zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMW m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTMW xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMW m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTMW ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTMW m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTMW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTNMB performs "Logical NAND of Packed Byte Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTNMB
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTNMB zmm, zmm, k{k} [AVX512BW,AVX512F]
|
|
// * VPTESTNMB m512, zmm, k{k} [AVX512BW,AVX512F]
|
|
// * VPTESTNMB xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMB m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMB ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMB m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTNMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTNMB", 3, Operands { v0, v1, v2 })
|
|
// VPTESTNMB zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMB m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTNMB xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMB m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTNMB ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMB m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTNMB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTNMD performs "Logical NAND of Packed Doubleword Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTNMD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTNMD m512/m32bcst, zmm, k{k} [AVX512F]
|
|
// * VPTESTNMD zmm, zmm, k{k} [AVX512F]
|
|
// * VPTESTNMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMD xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMD ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTNMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTNMD", 3, Operands { v0, v1, v2 })
|
|
// VPTESTNMD m512/m32bcst, zmm, k{k}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTNMD zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMD m128/m32bcst, xmm, k{k}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTNMD xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMD m256/m32bcst, ymm, k{k}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPTESTNMD ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTNMD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTNMQ performs "Logical NAND of Packed Quadword Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTNMQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTNMQ m512/m64bcst, zmm, k{k} [AVX512F]
|
|
// * VPTESTNMQ zmm, zmm, k{k} [AVX512F]
|
|
// * VPTESTNMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMQ xmm, xmm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
|
|
// * VPTESTNMQ ymm, ymm, k{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTNMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTNMQ", 3, Operands { v0, v1, v2 })
|
|
// VPTESTNMQ m512/m64bcst, zmm, k{k}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTNMQ zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMQ m128/m64bcst, xmm, k{k}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTNMQ xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMQ m256/m64bcst, ymm, k{k}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
|
|
m.emit(0x27)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPTESTNMQ ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x27)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTNMQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPTESTNMW performs "Logical NAND of Packed Word Integer Values and Set Mask".
|
|
//
|
|
// Mnemonic : VPTESTNMW
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPTESTNMW zmm, zmm, k{k} [AVX512BW,AVX512F]
|
|
// * VPTESTNMW m512, zmm, k{k} [AVX512BW,AVX512F]
|
|
// * VPTESTNMW xmm, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMW m128, xmm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMW ymm, ymm, k{k} [AVX512BW,AVX512VL]
|
|
// * VPTESTNMW m256, ymm, k{k} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPTESTNMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPTESTNMW", 3, Operands { v0, v1, v2 })
|
|
// VPTESTNMW zmm, zmm, k{k}
|
|
if isZMM(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMW m512, zmm, k{k}
|
|
if isM512(v0) && isZMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512F | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPTESTNMW xmm, xmm, k{k}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMW m128, xmm, k{k}
|
|
if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPTESTNMW ymm, ymm, k{k}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfe ^ (hlcode(v[1]) << 3))
|
|
m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x26)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPTESTNMW m256, ymm, k{k}
|
|
if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
|
|
m.emit(0x26)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPTESTNMW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words".
|
|
//
|
|
// Mnemonic : VPUNPCKHBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKHBW xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKHBW m128, xmm, xmm [AVX]
|
|
// * VPUNPCKHBW ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKHBW m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKHBW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKHBW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKHBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKHBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKHBW", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKHBW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHBW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHBW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHBW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHBW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHBW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKHBW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHBW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKHBW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x68)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHBW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x68)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKHBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords".
|
|
//
|
|
// Mnemonic : VPUNPCKHDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKHDQ xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKHDQ m128, xmm, xmm [AVX]
|
|
// * VPUNPCKHDQ ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKHDQ m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKHDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKHDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKHDQ", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKHDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKHDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKHDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6a)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPUNPCKHDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x6a)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKHDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords".
|
|
//
|
|
// Mnemonic : VPUNPCKHQDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKHQDQ xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKHQDQ m128, xmm, xmm [AVX]
|
|
// * VPUNPCKHQDQ ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKHQDQ m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKHQDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKHQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKHQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKHQDQ", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKHQDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPUNPCKHQDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x6d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKHQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords".
|
|
//
|
|
// Mnemonic : VPUNPCKHWD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKHWD xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKHWD m128, xmm, xmm [AVX]
|
|
// * VPUNPCKHWD ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKHWD m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKHWD zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKHWD m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKHWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKHWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKHWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKHWD", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKHWD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHWD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHWD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHWD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKHWD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHWD m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKHWD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHWD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKHWD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x69)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKHWD m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x69)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKHWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words".
|
|
//
|
|
// Mnemonic : VPUNPCKLBW
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKLBW xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKLBW m128, xmm, xmm [AVX]
|
|
// * VPUNPCKLBW ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKLBW m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKLBW zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKLBW m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKLBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKLBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKLBW", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKLBW xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLBW m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLBW ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLBW m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLBW zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLBW m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKLBW xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLBW m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKLBW ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x60)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLBW m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x60)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKLBW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords".
|
|
//
|
|
// Mnemonic : VPUNPCKLDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKLDQ xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKLDQ m128, xmm, xmm [AVX]
|
|
// * VPUNPCKLDQ ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKLDQ m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKLDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKLDQ", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKLDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKLDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKLDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x62)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPUNPCKLDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x62)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKLDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords".
|
|
//
|
|
// Mnemonic : VPUNPCKLQDQ
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKLQDQ xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKLQDQ m128, xmm, xmm [AVX]
|
|
// * VPUNPCKLQDQ ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKLQDQ m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKLQDQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPUNPCKLQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKLQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKLQDQ", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKLQDQ xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x6c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPUNPCKLQDQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x6c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKLQDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords".
|
|
//
|
|
// Mnemonic : VPUNPCKLWD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VPUNPCKLWD xmm, xmm, xmm [AVX]
|
|
// * VPUNPCKLWD m128, xmm, xmm [AVX]
|
|
// * VPUNPCKLWD ymm, ymm, ymm [AVX2]
|
|
// * VPUNPCKLWD m256, ymm, ymm [AVX2]
|
|
// * VPUNPCKLWD zmm, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKLWD m512, zmm, zmm{k}{z} [AVX512BW]
|
|
// * VPUNPCKLWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
// * VPUNPCKLWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
|
|
//
|
|
func (self *Program) VPUNPCKLWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPUNPCKLWD", 3, Operands { v0, v1, v2 })
|
|
// VPUNPCKLWD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLWD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLWD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLWD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPUNPCKLWD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLWD m512, zmm, zmm{k}{z}
|
|
if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPUNPCKLWD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLWD m128, xmm, xmm{k}{z}
|
|
if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPUNPCKLWD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x61)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPUNPCKLWD m256, ymm, ymm{k}{z}
|
|
if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512BW)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x61)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPUNPCKLWD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPXOR performs "Packed Bitwise Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : VPXOR
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VPXOR xmm, xmm, xmm [AVX]
|
|
// * VPXOR m128, xmm, xmm [AVX]
|
|
// * VPXOR ymm, ymm, ymm [AVX2]
|
|
// * VPXOR m256, ymm, ymm [AVX2]
|
|
//
|
|
func (self *Program) VPXOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPXOR", 3, Operands { v0, v1, v2 })
|
|
// VPXOR xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXOR m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VPXOR ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXOR m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX2)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPXOR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPXORD performs "Bitwise Logical Exclusive OR of Packed Doubleword Integers".
|
|
//
|
|
// Mnemonic : VPXORD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPXORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPXORD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPXORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPXORD", 3, Operands { v0, v1, v2 })
|
|
// VPXORD m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPXORD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXORD m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPXORD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXORD m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPXORD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPXORD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VPXORQ performs "Bitwise Logical Exclusive OR of Packed Quadword Integers".
|
|
//
|
|
// Mnemonic : VPXORQ
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VPXORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPXORQ zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VPXORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VPXORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VPXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VPXORQ", 3, Operands { v0, v1, v2 })
|
|
// VPXORQ m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VPXORQ zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXORQ m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VPXORQ xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VPXORQ m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0xef)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VPXORQ ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0xef)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VPXORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRANGEPD performs "Range Restriction Calculation For Packed Pairs of Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRANGEPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPD imm8, zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPD imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VRANGEPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRANGEPD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRANGEPD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRANGEPD takes 4 or 5 operands")
|
|
}
|
|
// VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPD imm8, ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRANGEPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRANGEPS performs "Range Restriction Calculation For Packed Pairs of Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRANGEPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPS imm8, zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VRANGEPS imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VRANGEPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRANGEPS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRANGEPS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRANGEPS takes 4 or 5 operands")
|
|
}
|
|
// VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x50)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGEPS imm8, ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x50)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRANGEPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRANGESD performs "Range Restriction Calculation For a pair of Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRANGESD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRANGESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VRANGESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VRANGESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRANGESD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRANGESD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRANGESD takes 4 or 5 operands")
|
|
}
|
|
// VRANGESD imm8, m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGESD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRANGESD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRANGESS performs "Range Restriction Calculation For a pair of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRANGESS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRANGESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VRANGESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VRANGESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRANGESS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRANGESS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRANGESS takes 4 or 5 operands")
|
|
}
|
|
// VRANGESS imm8, m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRANGESS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRANGESS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP14PD performs "Compute Approximate Reciprocals of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRCP14PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VRCP14PD m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VRCP14PD zmm, zmm{k}{z} [AVX512F]
|
|
// * VRCP14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRCP14PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRCP14PD", 2, Operands { v0, v1 })
|
|
// VRCP14PD m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRCP14PD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14PD m128/m64bcst, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VRCP14PD m256/m64bcst, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VRCP14PD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14PD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP14PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP14PS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRCP14PS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VRCP14PS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VRCP14PS zmm, zmm{k}{z} [AVX512F]
|
|
// * VRCP14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRCP14PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRCP14PS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRCP14PS", 2, Operands { v0, v1 })
|
|
// VRCP14PS m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRCP14PS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14PS m128/m32bcst, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VRCP14PS m256/m32bcst, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4c)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VRCP14PS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14PS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x4c)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP14PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP14SD performs "Compute Approximate Reciprocal of a Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VRCP14SD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRCP14SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRCP14SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRCP14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRCP14SD", 3, Operands { v0, v1, v2 })
|
|
// VRCP14SD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14SD m64, xmm, xmm{k}{z}
|
|
if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP14SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP14SS performs "Compute Approximate Reciprocal of a Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VRCP14SS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRCP14SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRCP14SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRCP14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRCP14SS", 3, Operands { v0, v1, v2 })
|
|
// VRCP14SS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x4d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCP14SS m32, xmm, xmm{k}{z}
|
|
if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x4d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP14SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP28PD performs "Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRCP28PD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRCP28PD m512/m64bcst, zmm{k}{z} [AVX512ER]
|
|
// * VRCP28PD {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VRCP28PD zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRCP28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRCP28PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VRCP28PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VRCP28PD takes 2 or 3 operands")
|
|
}
|
|
// VRCP28PD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xca)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRCP28PD {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xca)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRCP28PD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xca)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP28PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP28PS performs "Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRCP28PS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRCP28PS m512/m32bcst, zmm{k}{z} [AVX512ER]
|
|
// * VRCP28PS {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VRCP28PS zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRCP28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRCP28PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VRCP28PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VRCP28PS takes 2 or 3 operands")
|
|
}
|
|
// VRCP28PS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xca)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRCP28PS {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xca)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRCP28PS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xca)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP28PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP28SD performs "Approximation to the Reciprocal of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRCP28SD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRCP28SD m64, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRCP28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRCP28SD xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRCP28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRCP28SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRCP28SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRCP28SD takes 3 or 4 operands")
|
|
}
|
|
// VRCP28SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xcb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VRCP28SD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRCP28SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP28SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCP28SS performs "Approximation to the Reciprocal of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRCP28SS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRCP28SS m32, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRCP28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRCP28SS xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRCP28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRCP28SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRCP28SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRCP28SS takes 3 or 4 operands")
|
|
}
|
|
// VRCP28SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xcb)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VRCP28SS {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRCP28SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xcb)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCP28SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRCPPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VRCPPS xmm, xmm [AVX]
|
|
// * VRCPPS m128, xmm [AVX]
|
|
// * VRCPPS ymm, ymm [AVX]
|
|
// * VRCPPS m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VRCPPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRCPPS", 2, Operands { v0, v1 })
|
|
// VRCPPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x53)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCPPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x53)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VRCPPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x53)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCPPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x53)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCPPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRCPSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRCPSS xmm, xmm, xmm [AVX]
|
|
// * VRCPSS m32, xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VRCPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRCPSS", 3, Operands { v0, v1, v2 })
|
|
// VRCPSS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x53)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRCPSS m32, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x53)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRCPSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VREDUCEPD performs "Perform Reduction Transformation on Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VREDUCEPD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VREDUCEPD imm8, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPD imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPD imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VREDUCEPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VREDUCEPD", 3, Operands { v0, v1, v2 })
|
|
// VREDUCEPD imm8, m512/m64bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPD imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPD imm8, m128/m64bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPD imm8, m256/m64bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPD imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPD imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VREDUCEPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VREDUCEPS performs "Perform Reduction Transformation on Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VREDUCEPS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512DQ]
|
|
// * VREDUCEPS imm8, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPS imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VREDUCEPS imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VREDUCEPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VREDUCEPS", 3, Operands { v0, v1, v2 })
|
|
// VREDUCEPS imm8, m512/m32bcst, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPS imm8, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPS imm8, m128/m32bcst, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPS imm8, m256/m32bcst, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x56)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPS imm8, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCEPS imm8, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x56)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VREDUCEPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VREDUCESD performs "Perform Reduction Transformation on a Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VREDUCESD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VREDUCESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VREDUCESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VREDUCESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VREDUCESD", 4, Operands { v0, v1, v2, v3 })
|
|
// VREDUCESD imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCESD imm8, m64, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VREDUCESD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VREDUCESS performs "Perform Reduction Transformation on a Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VREDUCESS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VREDUCESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
|
|
// * VREDUCESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ]
|
|
//
|
|
func (self *Program) VREDUCESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VREDUCESS", 4, Operands { v0, v1, v2, v3 })
|
|
// VREDUCESS imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VREDUCESS imm8, m32, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VREDUCESS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRNDSCALEPD performs "Round Packed Double-Precision Floating-Point Values To Include A Given Number Of Fraction Bits".
|
|
//
|
|
// Mnemonic : VRNDSCALEPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPD imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRNDSCALEPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRNDSCALEPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRNDSCALEPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRNDSCALEPD takes 3 or 4 operands")
|
|
}
|
|
// VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPD imm8, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRNDSCALEPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRNDSCALEPS performs "Round Packed Single-Precision Floating-Point Values To Include A Given Number Of Fraction Bits".
|
|
//
|
|
// Mnemonic : VRNDSCALEPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPS imm8, zmm, zmm{k}{z} [AVX512F]
|
|
// * VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRNDSCALEPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRNDSCALEPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRNDSCALEPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRNDSCALEPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRNDSCALEPS takes 3 or 4 operands")
|
|
}
|
|
// VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALEPS imm8, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRNDSCALEPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRNDSCALESD performs "Round Scalar Double-Precision Floating-Point Value To Include A Given Number Of Fraction Bits".
|
|
//
|
|
// Mnemonic : VRNDSCALESD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRNDSCALESD imm8, m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRNDSCALESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRNDSCALESD", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRNDSCALESD", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRNDSCALESD takes 4 or 5 operands")
|
|
}
|
|
// VRNDSCALESD imm8, m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 8)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALESD imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRNDSCALESD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRNDSCALESS performs "Round Scalar Single-Precision Floating-Point Value To Include A Given Number Of Fraction Bits".
|
|
//
|
|
// Mnemonic : VRNDSCALESS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRNDSCALESS imm8, m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRNDSCALESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRNDSCALESS", 4, Operands { v0, v1, v2, v3 })
|
|
case 1 : p = self.alloc("VRNDSCALESS", 5, Operands { v0, v1, v2, v3, vv[0] })
|
|
default : panic("instruction VRNDSCALESS takes 4 or 5 operands")
|
|
}
|
|
// VRNDSCALESS imm8, m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 4)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[3]) << 3))
|
|
m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VRNDSCALESS imm8, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRNDSCALESS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VROUNDPD performs "Round Packed Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VROUNDPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VROUNDPD imm8, xmm, xmm [AVX]
|
|
// * VROUNDPD imm8, m128, xmm [AVX]
|
|
// * VROUNDPD imm8, ymm, ymm [AVX]
|
|
// * VROUNDPD imm8, m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VROUNDPD", 3, Operands { v0, v1, v2 })
|
|
// VROUNDPD imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPD imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPD imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x09)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPD imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x09)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VROUNDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VROUNDPS performs "Round Packed Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VROUNDPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VROUNDPS imm8, xmm, xmm [AVX]
|
|
// * VROUNDPS imm8, m128, xmm [AVX]
|
|
// * VROUNDPS imm8, ymm, ymm [AVX]
|
|
// * VROUNDPS imm8, m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VROUNDPS", 3, Operands { v0, v1, v2 })
|
|
// VROUNDPS imm8, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPS imm8, m128, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPS imm8, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x08)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDPS imm8, m256, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
|
|
m.emit(0x08)
|
|
m.mrsd(lcode(v[2]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VROUNDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VROUNDSD performs "Round Scalar Double Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VROUNDSD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VROUNDSD imm8, xmm, xmm, xmm [AVX]
|
|
// * VROUNDSD imm8, m64, xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VROUNDSD", 4, Operands { v0, v1, v2, v3 })
|
|
// VROUNDSD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0b)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDSD imm8, m64, xmm, xmm
|
|
if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0b)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VROUNDSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VROUNDSS performs "Round Scalar Single Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VROUNDSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VROUNDSS imm8, xmm, xmm, xmm [AVX]
|
|
// * VROUNDSS imm8, m32, xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VROUNDSS", 4, Operands { v0, v1, v2, v3 })
|
|
// VROUNDSS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
|
|
m.emit(0x79 ^ (hlcode(v[2]) << 3))
|
|
m.emit(0x0a)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VROUNDSS imm8, m32, xmm, xmm
|
|
if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0x0a)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VROUNDSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT14PD performs "Compute Approximate Reciprocals of Square Roots of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRSQRT14PD
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VRSQRT14PD m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VRSQRT14PD zmm, zmm{k}{z} [AVX512F]
|
|
// * VRSQRT14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRSQRT14PD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRT14PD", 2, Operands { v0, v1 })
|
|
// VRSQRT14PD m512/m64bcst, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRSQRT14PD zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14PD m128/m64bcst, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VRSQRT14PD m256/m64bcst, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VRSQRT14PD xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14PD ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT14PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT14PS performs "Compute Approximate Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRSQRT14PS
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * VRSQRT14PS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VRSQRT14PS zmm, zmm{k}{z} [AVX512F]
|
|
// * VRSQRT14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VRSQRT14PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VRSQRT14PS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRT14PS", 2, Operands { v0, v1 })
|
|
// VRSQRT14PS m512/m32bcst, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRSQRT14PS zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14PS m128/m32bcst, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VRSQRT14PS m256/m32bcst, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x4e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VRSQRT14PS xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14PS ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x4e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT14PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT14SD performs "Compute Approximate Reciprocal of a Square Root of a Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VRSQRT14SD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRSQRT14SD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRSQRT14SD m64, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRSQRT14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRT14SD", 3, Operands { v0, v1, v2 })
|
|
// VRSQRT14SD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14SD m64, xmm, xmm{k}{z}
|
|
if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT14SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT14SS performs "Compute Approximate Reciprocal of a Square Root of a Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VRSQRT14SS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRSQRT14SS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VRSQRT14SS m32, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VRSQRT14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRT14SS", 3, Operands { v0, v1, v2 })
|
|
// VRSQRT14SS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x4f)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRT14SS m32, xmm, xmm{k}{z}
|
|
if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x4f)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT14SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT28PD performs "Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRSQRT28PD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRSQRT28PD m512/m64bcst, zmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28PD {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28PD zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRSQRT28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRSQRT28PD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VRSQRT28PD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VRSQRT28PD takes 2 or 3 operands")
|
|
}
|
|
// VRSQRT28PD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xcc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRSQRT28PD {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRSQRT28PD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT28PD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT28PS performs "Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRSQRT28PS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRSQRT28PS m512/m32bcst, zmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28PS {sae}, zmm, zmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28PS zmm, zmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRSQRT28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRSQRT28PS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VRSQRT28PS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VRSQRT28PS takes 2 or 3 operands")
|
|
}
|
|
// VRSQRT28PS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0xcc)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VRSQRT28PS {sae}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRSQRT28PS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7d)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0xcc)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT28PS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT28SD performs "Approximation to the Reciprocal Square Root of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRSQRT28SD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRSQRT28SD m64, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28SD xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRSQRT28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRSQRT28SD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRSQRT28SD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRSQRT28SD takes 3 or 4 operands")
|
|
}
|
|
// VRSQRT28SD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xcd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRSQRT28SD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT28SD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRT28SS performs "Approximation to the Reciprocal Square Root of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
|
|
//
|
|
// Mnemonic : VRSQRT28SS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VRSQRT28SS m32, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
// * VRSQRT28SS xmm, xmm, xmm{k}{z} [AVX512ER]
|
|
//
|
|
func (self *Program) VRSQRT28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VRSQRT28SS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VRSQRT28SS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VRSQRT28SS takes 3 or 4 operands")
|
|
}
|
|
// VRSQRT28SS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0xcd)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VRSQRT28SS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512ER)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0xcd)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRT28SS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VRSQRTPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VRSQRTPS xmm, xmm [AVX]
|
|
// * VRSQRTPS m128, xmm [AVX]
|
|
// * VRSQRTPS ymm, ymm [AVX]
|
|
// * VRSQRTPS m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VRSQRTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRTPS", 2, Operands { v0, v1 })
|
|
// VRSQRTPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x52)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRTPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x52)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VRSQRTPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x52)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRTPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x52)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VRSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VRSQRTSS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * VRSQRTSS xmm, xmm, xmm [AVX]
|
|
// * VRSQRTSS m32, xmm, xmm [AVX]
|
|
//
|
|
func (self *Program) VRSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VRSQRTSS", 3, Operands { v0, v1, v2 })
|
|
// VRSQRTSS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x52)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VRSQRTSS m32, xmm, xmm
|
|
if isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x52)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VRSQRTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCALEFPD performs "Scale Packed Double-Precision Floating-Point Values With Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSCALEFPD
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCALEFPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSCALEFPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSCALEFPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSCALEFPD takes 3 or 4 operands")
|
|
}
|
|
// VSCALEFPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSCALEFPD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSCALEFPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSCALEFPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSCALEFPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSCALEFPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSCALEFPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCALEFPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCALEFPS performs "Scale Packed Single-Precision Floating-Point Values With Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSCALEFPS
|
|
// Supported forms : (7 forms)
|
|
//
|
|
// * VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSCALEFPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCALEFPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSCALEFPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSCALEFPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSCALEFPS takes 3 or 4 operands")
|
|
}
|
|
// VSCALEFPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSCALEFPS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSCALEFPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSCALEFPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSCALEFPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSCALEFPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x2c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSCALEFPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x2c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCALEFPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCALEFSD performs "Scale Scalar Double-Precision Floating-Point Value With a Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VSCALEFSD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCALEFSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSCALEFSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSCALEFSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSCALEFSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSCALEFSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSCALEFSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSCALEFSD takes 3 or 4 operands")
|
|
}
|
|
// VSCALEFSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VSCALEFSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSCALEFSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCALEFSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCALEFSS performs "Scale Scalar Single-Precision Floating-Point Value With a Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VSCALEFSS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCALEFSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSCALEFSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSCALEFSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSCALEFSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSCALEFSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSCALEFSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSCALEFSS takes 3 or 4 operands")
|
|
}
|
|
// VSCALEFSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x2d)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VSCALEFSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSCALEFSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x2d)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCALEFSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERDPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VSCATTERDPD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCATTERDPD zmm, vm32y{k} [AVX512F]
|
|
// * VSCATTERDPD xmm, vm32x{k} [AVX512F,AVX512VL]
|
|
// * VSCATTERDPD ymm, vm32x{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCATTERDPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERDPD", 2, Operands { v0, v1 })
|
|
// VSCATTERDPD zmm, vm32y{k}
|
|
if isZMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VSCATTERDPD xmm, vm32x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VSCATTERDPD ymm, vm32x{k}
|
|
if isEVEXYMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERDPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERDPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Doubleword Indices".
|
|
//
|
|
// Mnemonic : VSCATTERDPS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCATTERDPS zmm, vm32z{k} [AVX512F]
|
|
// * VSCATTERDPS xmm, vm32x{k} [AVX512F,AVX512VL]
|
|
// * VSCATTERDPS ymm, vm32y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCATTERDPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERDPS", 2, Operands { v0, v1 })
|
|
// VSCATTERDPS zmm, vm32z{k}
|
|
if isZMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VSCATTERDPS xmm, vm32x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VSCATTERDPS ymm, vm32y{k}
|
|
if isEVEXYMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa2)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERDPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF0DPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF0DPD vm32y{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF0DPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF0DPD", 1, Operands { v0 })
|
|
// VSCATTERPF0DPD vm32y{k}
|
|
if isVMYk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(5, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF0DPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF0DPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF0DPS vm32z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF0DPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF0DPS", 1, Operands { v0 })
|
|
// VSCATTERPF0DPS vm32z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(5, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF0DPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF0QPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF0QPD vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF0QPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF0QPD", 1, Operands { v0 })
|
|
// VSCATTERPF0QPD vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(5, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF0QPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF0QPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF0QPS vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF0QPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF0QPS", 1, Operands { v0 })
|
|
// VSCATTERPF0QPS vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(5, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF0QPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF1DPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF1DPD vm32y{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF1DPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF1DPD", 1, Operands { v0 })
|
|
// VSCATTERPF1DPD vm32y{k}
|
|
if isVMYk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(6, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF1DPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF1DPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF1DPS vm32z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF1DPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF1DPS", 1, Operands { v0 })
|
|
// VSCATTERPF1DPS vm32z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc6)
|
|
m.mrsd(6, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF1DPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF1QPD
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF1QPD vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF1QPD(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF1QPD", 1, Operands { v0 })
|
|
// VSCATTERPF1QPD vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(6, addr(v[0]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF1QPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write".
|
|
//
|
|
// Mnemonic : VSCATTERPF1QPS
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSCATTERPF1QPS vm64z{k} [AVX512PF]
|
|
//
|
|
func (self *Program) VSCATTERPF1QPS(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERPF1QPS", 1, Operands { v0 })
|
|
// VSCATTERPF1QPS vm64z{k}
|
|
if isVMZk(v0) {
|
|
self.require(ISA_AVX512PF)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
|
|
m.emit(0xc7)
|
|
m.mrsd(6, addr(v[0]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERPF1QPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERQPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VSCATTERQPD
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCATTERQPD zmm, vm64z{k} [AVX512F]
|
|
// * VSCATTERQPD xmm, vm64x{k} [AVX512F,AVX512VL]
|
|
// * VSCATTERQPD ymm, vm64y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCATTERQPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERQPD", 2, Operands { v0, v1 })
|
|
// VSCATTERQPD zmm, vm64z{k}
|
|
if isZMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VSCATTERQPD xmm, vm64x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
// VSCATTERQPD ymm, vm64y{k}
|
|
if isEVEXYMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 8)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERQPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSCATTERQPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Quadword Indices".
|
|
//
|
|
// Mnemonic : VSCATTERQPS
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * VSCATTERQPS ymm, vm64z{k} [AVX512F]
|
|
// * VSCATTERQPS xmm, vm64x{k} [AVX512F,AVX512VL]
|
|
// * VSCATTERQPS xmm, vm64y{k} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSCATTERQPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VSCATTERQPS", 2, Operands { v0, v1 })
|
|
// VSCATTERQPS ymm, vm64z{k}
|
|
if isEVEXYMM(v0) && isVMZk(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VSCATTERQPS xmm, vm64x{k}
|
|
if isEVEXXMM(v0) && isVMXk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
// VSCATTERQPS xmm, vm64y{k}
|
|
if isEVEXXMM(v0) && isVMYk(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
|
|
m.emit(0xa3)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 4)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSCATTERQPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFF32X4 performs "Shuffle 128-Bit Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSHUFF32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFF32X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFF32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFF64X2 performs "Shuffle 128-Bit Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSHUFF64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFF64X2", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x23)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x23)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFF64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFI32X4 performs "Shuffle 128-Bit Packed Doubleword Integer Values".
|
|
//
|
|
// Mnemonic : VSHUFI32X4
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFI32X4", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7d ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFI32X4")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFI64X2 performs "Shuffle 128-Bit Packed Quadword Integer Values".
|
|
//
|
|
// Mnemonic : VSHUFI64X2
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFI64X2", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0x43)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0x43)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFI64X2")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSHUFPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VSHUFPD imm8, xmm, xmm, xmm [AVX]
|
|
// * VSHUFPD imm8, m128, xmm, xmm [AVX]
|
|
// * VSHUFPD imm8, ymm, ymm, ymm [AVX]
|
|
// * VSHUFPD imm8, m256, ymm, ymm [AVX]
|
|
// * VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFPD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFPD", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFPD imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPD imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSHUFPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VSHUFPS imm8, xmm, xmm, xmm [AVX]
|
|
// * VSHUFPS imm8, m128, xmm, xmm [AVX]
|
|
// * VSHUFPS imm8, ymm, ymm, ymm [AVX]
|
|
// * VSHUFPS imm8, m256, ymm, ymm [AVX]
|
|
// * VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFPS imm8, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPS imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSHUFPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSHUFPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
|
|
p := self.alloc("VSHUFPS", 4, Operands { v0, v1, v2, v3 })
|
|
// VSHUFPS imm8, xmm, xmm, xmm
|
|
if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, m128, xmm, xmm
|
|
if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, ymm, ymm, ymm
|
|
if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[3]), v[1], hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, m256, ymm, ymm
|
|
if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 64)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, zmm, zmm, zmm{k}{z}
|
|
if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 16)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, xmm, xmm, xmm{k}{z}
|
|
if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
|
|
m.emit(0xc6)
|
|
m.mrsd(lcode(v[3]), addr(v[1]), 32)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// VSHUFPS imm8, ymm, ymm, ymm{k}{z}
|
|
if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
|
|
m.emit(0xc6)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSHUFPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSQRTPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VSQRTPD xmm, xmm [AVX]
|
|
// * VSQRTPD m128, xmm [AVX]
|
|
// * VSQRTPD ymm, ymm [AVX]
|
|
// * VSQRTPD m256, ymm [AVX]
|
|
// * VSQRTPD m512/m64bcst, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPD {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPD zmm, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSQRTPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSQRTPD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VSQRTPD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VSQRTPD takes 2 or 3 operands")
|
|
}
|
|
// VSQRTPD xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPD m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTPD ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), v[0], 0)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPD m256, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTPD m512/m64bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSQRTPD {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSQRTPD zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPD m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSQRTPD m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSQRTPD xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPD ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSQRTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSQRTPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VSQRTPS xmm, xmm [AVX]
|
|
// * VSQRTPS m128, xmm [AVX]
|
|
// * VSQRTPS ymm, ymm [AVX]
|
|
// * VSQRTPS m256, ymm [AVX]
|
|
// * VSQRTPS m512/m32bcst, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPS {er}, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPS zmm, zmm{k}{z} [AVX512F]
|
|
// * VSQRTPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSQRTPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSQRTPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSQRTPS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VSQRTPS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VSQRTPS takes 2 or 3 operands")
|
|
}
|
|
// VSQRTPS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPS m128, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTPS ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), v[0], 0)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPS m256, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTPS m512/m32bcst, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSQRTPS {er}, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSQRTPS zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPS m128/m32bcst, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSQRTPS m256/m32bcst, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSQRTPS xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTPS ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSQRTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VSQRTSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VSQRTSD xmm, xmm, xmm [AVX]
|
|
// * VSQRTSD m64, xmm, xmm [AVX]
|
|
// * VSQRTSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSQRTSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSQRTSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSQRTSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSQRTSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSQRTSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSQRTSD takes 3 or 4 operands")
|
|
}
|
|
// VSQRTSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VSQRTSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSQRTSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSQRTSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value".
|
|
//
|
|
// Mnemonic : VSQRTSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VSQRTSS xmm, xmm, xmm [AVX]
|
|
// * VSQRTSS m32, xmm, xmm [AVX]
|
|
// * VSQRTSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSQRTSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSQRTSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSQRTSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSQRTSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSQRTSS takes 3 or 4 operands")
|
|
}
|
|
// VSQRTSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSQRTSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSQRTSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x51)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VSQRTSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSQRTSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x51)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSQRTSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSTMXCSR performs "Store MXCSR Register State".
|
|
//
|
|
// Mnemonic : VSTMXCSR
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VSTMXCSR m32 [AVX]
|
|
//
|
|
func (self *Program) VSTMXCSR(v0 interface{}) *Instruction {
|
|
p := self.alloc("VSTMXCSR", 1, Operands { v0 })
|
|
// VSTMXCSR m32
|
|
if isM32(v0) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, addr(v[0]), 0)
|
|
m.emit(0xae)
|
|
m.mrsd(3, addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSTMXCSR")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSUBPD performs "Subtract Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSUBPD
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VSUBPD xmm, xmm, xmm [AVX]
|
|
// * VSUBPD m128, xmm, xmm [AVX]
|
|
// * VSUBPD ymm, ymm, ymm [AVX]
|
|
// * VSUBPD m256, ymm, ymm [AVX]
|
|
// * VSUBPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSUBPD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSUBPD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSUBPD takes 3 or 4 operands")
|
|
}
|
|
// VSUBPD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPD m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBPD ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPD m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSUBPD {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSUBPD zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSUBPD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSUBPD ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSUBPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSUBPS performs "Subtract Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSUBPS
|
|
// Supported forms : (11 forms)
|
|
//
|
|
// * VSUBPS xmm, xmm, xmm [AVX]
|
|
// * VSUBPS m128, xmm, xmm [AVX]
|
|
// * VSUBPS ymm, ymm, ymm [AVX]
|
|
// * VSUBPS m256, ymm, ymm [AVX]
|
|
// * VSUBPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VSUBPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VSUBPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSUBPS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSUBPS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSUBPS takes 3 or 4 operands")
|
|
}
|
|
// VSUBPS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPS m128, xmm, xmm
|
|
if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBPS ymm, ymm, ymm
|
|
if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPS m256, ymm, ymm
|
|
if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VSUBPS {er}, zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSUBPS zmm, zmm, zmm{k}{z}
|
|
if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VSUBPS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VSUBPS ymm, ymm, ymm{k}{z}
|
|
if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSUBPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSUBSD performs "Subtract Scalar Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSUBSD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VSUBSD xmm, xmm, xmm [AVX]
|
|
// * VSUBSD m64, xmm, xmm [AVX]
|
|
// * VSUBSD m64, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSUBSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSUBSD xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSUBSD", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSUBSD", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSUBSD takes 3 or 4 operands")
|
|
}
|
|
// VSUBSD xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBSD m64, xmm, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBSD m64, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VSUBSD {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSUBSD xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xff ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSUBSD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VSUBSS performs "Subtract Scalar Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VSUBSS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VSUBSS xmm, xmm, xmm [AVX]
|
|
// * VSUBSS m32, xmm, xmm [AVX]
|
|
// * VSUBSS m32, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSUBSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
|
|
// * VSUBSS xmm, xmm, xmm{k}{z} [AVX512F]
|
|
//
|
|
func (self *Program) VSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VSUBSS", 3, Operands { v0, v1, v2 })
|
|
case 1 : p = self.alloc("VSUBSS", 4, Operands { v0, v1, v2, vv[0] })
|
|
default : panic("instruction VSUBSS takes 3 or 4 operands")
|
|
}
|
|
// VSUBSS xmm, xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VSUBSS m32, xmm, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VSUBSS m32, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
|
|
m.emit(0x5c)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VSUBSS {er}, xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[2]) << 3))
|
|
m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VSUBSS xmm, xmm, xmm{k}{z}
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7e ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x5c)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VSUBSS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VTESTPD performs "Packed Double-Precision Floating-Point Bit Test".
|
|
//
|
|
// Mnemonic : VTESTPD
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VTESTPD xmm, xmm [AVX]
|
|
// * VTESTPD m128, xmm [AVX]
|
|
// * VTESTPD ymm, ymm [AVX]
|
|
// * VTESTPD m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VTESTPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VTESTPD", 2, Operands { v0, v1 })
|
|
// VTESTPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VTESTPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VTESTPD ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x0f)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VTESTPD m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x0f)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VTESTPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VTESTPS performs "Packed Single-Precision Floating-Point Bit Test".
|
|
//
|
|
// Mnemonic : VTESTPS
|
|
// Supported forms : (4 forms)
|
|
//
|
|
// * VTESTPS xmm, xmm [AVX]
|
|
// * VTESTPS m128, xmm [AVX]
|
|
// * VTESTPS ymm, ymm [AVX]
|
|
// * VTESTPS m256, ymm [AVX]
|
|
//
|
|
func (self *Program) VTESTPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("VTESTPS", 2, Operands { v0, v1 })
|
|
// VTESTPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x79)
|
|
m.emit(0x0e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VTESTPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x0e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VTESTPS ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xc4)
|
|
m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
|
|
m.emit(0x7d)
|
|
m.emit(0x0e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VTESTPS m256, ymm
|
|
if isM256(v0) && isYMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x0e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VTESTPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : VUCOMISD
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VUCOMISD xmm, xmm [AVX]
|
|
// * VUCOMISD m64, xmm [AVX]
|
|
// * VUCOMISD m64, xmm [AVX512F]
|
|
// * VUCOMISD {sae}, xmm, xmm [AVX512F]
|
|
// * VUCOMISD xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VUCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VUCOMISD", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VUCOMISD", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VUCOMISD takes 2 or 3 operands")
|
|
}
|
|
// VUCOMISD xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUCOMISD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUCOMISD m64, xmm
|
|
if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 8)
|
|
})
|
|
}
|
|
// VUCOMISD {sae}, xmm, xmm
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x18)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VUCOMISD xmm, xmm
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0xfd)
|
|
m.emit(0x48)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUCOMISD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS".
|
|
//
|
|
// Mnemonic : VUCOMISS
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * VUCOMISS xmm, xmm [AVX]
|
|
// * VUCOMISS m32, xmm [AVX]
|
|
// * VUCOMISS m32, xmm [AVX512F]
|
|
// * VUCOMISS {sae}, xmm, xmm [AVX512F]
|
|
// * VUCOMISS xmm, xmm [AVX512F]
|
|
//
|
|
func (self *Program) VUCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
|
|
var p *Instruction
|
|
switch len(vv) {
|
|
case 0 : p = self.alloc("VUCOMISS", 2, Operands { v0, v1 })
|
|
case 1 : p = self.alloc("VUCOMISS", 3, Operands { v0, v1, vv[0] })
|
|
default : panic("instruction VUCOMISS takes 2 or 3 operands")
|
|
}
|
|
// VUCOMISS xmm, xmm
|
|
if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), v[0], 0)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUCOMISS m32, xmm
|
|
if len(vv) == 0 && isM32(v0) && isXMM(v1) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[1]), addr(v[0]), 0)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUCOMISS m32, xmm
|
|
if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
|
|
m.emit(0x2e)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 4)
|
|
})
|
|
}
|
|
// VUCOMISS {sae}, xmm, xmm
|
|
if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit(0x18)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// VUCOMISS xmm, xmm
|
|
if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
|
|
m.emit(0x7c)
|
|
m.emit(0x48)
|
|
m.emit(0x2e)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUCOMISS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VUNPCKHPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VUNPCKHPD xmm, xmm, xmm [AVX]
|
|
// * VUNPCKHPD m128, xmm, xmm [AVX]
|
|
// * VUNPCKHPD ymm, ymm, ymm [AVX]
|
|
// * VUNPCKHPD m256, ymm, ymm [AVX]
|
|
// * VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKHPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VUNPCKHPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VUNPCKHPD", 3, Operands { v0, v1, v2 })
|
|
// VUNPCKHPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKHPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VUNPCKHPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VUNPCKHPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VUNPCKHPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUNPCKHPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VUNPCKHPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VUNPCKHPS xmm, xmm, xmm [AVX]
|
|
// * VUNPCKHPS m128, xmm, xmm [AVX]
|
|
// * VUNPCKHPS ymm, ymm, ymm [AVX]
|
|
// * VUNPCKHPS m256, ymm, ymm [AVX]
|
|
// * VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKHPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKHPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VUNPCKHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VUNPCKHPS", 3, Operands { v0, v1, v2 })
|
|
// VUNPCKHPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKHPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VUNPCKHPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VUNPCKHPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x15)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VUNPCKHPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x15)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUNPCKHPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VUNPCKLPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VUNPCKLPD xmm, xmm, xmm [AVX]
|
|
// * VUNPCKLPD m128, xmm, xmm [AVX]
|
|
// * VUNPCKLPD ymm, ymm, ymm [AVX]
|
|
// * VUNPCKLPD m256, ymm, ymm [AVX]
|
|
// * VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKLPD zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VUNPCKLPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VUNPCKLPD", 3, Operands { v0, v1, v2 })
|
|
// VUNPCKLPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKLPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VUNPCKLPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VUNPCKLPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VUNPCKLPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUNPCKLPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VUNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VUNPCKLPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VUNPCKLPS xmm, xmm, xmm [AVX]
|
|
// * VUNPCKLPS m128, xmm, xmm [AVX]
|
|
// * VUNPCKLPS ymm, ymm, ymm [AVX]
|
|
// * VUNPCKLPS m256, ymm, ymm [AVX]
|
|
// * VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKLPS zmm, zmm, zmm{k}{z} [AVX512F]
|
|
// * VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
// * VUNPCKLPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
|
|
//
|
|
func (self *Program) VUNPCKLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VUNPCKLPS", 3, Operands { v0, v1, v2 })
|
|
// VUNPCKLPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKLPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VUNPCKLPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VUNPCKLPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x14)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VUNPCKLPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512F)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x14)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VUNPCKLPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VXORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VXORPD
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VXORPD xmm, xmm, xmm [AVX]
|
|
// * VXORPD m128, xmm, xmm [AVX]
|
|
// * VXORPD ymm, ymm, ymm [AVX]
|
|
// * VXORPD m256, ymm, ymm [AVX]
|
|
// * VXORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VXORPD zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VXORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VXORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VXORPD", 3, Operands { v0, v1, v2 })
|
|
// VXORPD xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPD m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VXORPD ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPD m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VXORPD m512/m64bcst, zmm, zmm{k}{z}
|
|
if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VXORPD zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPD m128/m64bcst, xmm, xmm{k}{z}
|
|
if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VXORPD xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPD m256/m64bcst, ymm, ymm{k}{z}
|
|
if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VXORPD ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0xfd ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VXORPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VXORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : VXORPS
|
|
// Supported forms : (10 forms)
|
|
//
|
|
// * VXORPS xmm, xmm, xmm [AVX]
|
|
// * VXORPS m128, xmm, xmm [AVX]
|
|
// * VXORPS ymm, ymm, ymm [AVX]
|
|
// * VXORPS m256, ymm, ymm [AVX]
|
|
// * VXORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VXORPS zmm, zmm, zmm{k}{z} [AVX512DQ]
|
|
// * VXORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
// * VXORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
|
|
//
|
|
func (self *Program) VXORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
|
|
p := self.alloc("VXORPS", 3, Operands { v0, v1, v2 })
|
|
// VXORPS xmm, xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPS m128, xmm, xmm
|
|
if isM128(v0) && isXMM(v1) && isXMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VXORPS ymm, ymm, ymm
|
|
if isYMM(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPS m256, ymm, ymm
|
|
if isM256(v0) && isYMM(v1) && isYMM(v2) {
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// VXORPS m512/m32bcst, zmm, zmm{k}{z}
|
|
if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 64)
|
|
})
|
|
}
|
|
// VXORPS zmm, zmm, zmm{k}{z}
|
|
if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
|
|
self.require(ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPS m128/m32bcst, xmm, xmm{k}{z}
|
|
if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 16)
|
|
})
|
|
}
|
|
// VXORPS xmm, xmm, xmm{k}{z}
|
|
if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// VXORPS m256/m32bcst, ymm, ymm{k}{z}
|
|
if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[2]), addr(v[0]), 32)
|
|
})
|
|
}
|
|
// VXORPS ymm, ymm, ymm{k}{z}
|
|
if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
|
|
self.require(ISA_AVX512VL | ISA_AVX512DQ)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x62)
|
|
m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
|
|
m.emit(0x7c ^ (hlcode(v[1]) << 3))
|
|
m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for VXORPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// VZEROALL performs "Zero All YMM Registers".
|
|
//
|
|
// Mnemonic : VZEROALL
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VZEROALL [AVX]
|
|
//
|
|
func (self *Program) VZEROALL() *Instruction {
|
|
p := self.alloc("VZEROALL", 0, Operands { })
|
|
// VZEROALL
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(4, 0, nil, 0)
|
|
m.emit(0x77)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// VZEROUPPER performs "Zero Upper Bits of YMM Registers".
|
|
//
|
|
// Mnemonic : VZEROUPPER
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * VZEROUPPER [AVX]
|
|
//
|
|
func (self *Program) VZEROUPPER() *Instruction {
|
|
p := self.alloc("VZEROUPPER", 0, Operands { })
|
|
// VZEROUPPER
|
|
self.require(ISA_AVX)
|
|
p.domain = DomainAVX
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.vex2(0, 0, nil, 0)
|
|
m.emit(0x77)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// XADDB performs "Exchange and Add".
|
|
//
|
|
// Mnemonic : XADD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XADDB r8, r8
|
|
// * XADDB r8, m8
|
|
//
|
|
func (self *Program) XADDB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XADDB", 2, Operands { v0, v1 })
|
|
// XADDB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc0)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XADDB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc0)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XADDB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XADDL performs "Exchange and Add".
|
|
//
|
|
// Mnemonic : XADD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XADDL r32, r32
|
|
// * XADDL r32, m32
|
|
//
|
|
func (self *Program) XADDL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XADDL", 2, Operands { v0, v1 })
|
|
// XADDL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XADDL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XADDL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XADDQ performs "Exchange and Add".
|
|
//
|
|
// Mnemonic : XADD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XADDQ r64, r64
|
|
// * XADDQ r64, m64
|
|
//
|
|
func (self *Program) XADDQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XADDQ", 2, Operands { v0, v1 })
|
|
// XADDQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XADDQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XADDQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XADDW performs "Exchange and Add".
|
|
//
|
|
// Mnemonic : XADD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XADDW r16, r16
|
|
// * XADDW r16, m16
|
|
//
|
|
func (self *Program) XADDW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XADDW", 2, Operands { v0, v1 })
|
|
// XADDW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XADDW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0xc1)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XADDW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XCHGB performs "Exchange Register/Memory with Register".
|
|
//
|
|
// Mnemonic : XCHG
|
|
// Supported forms : (3 forms)
|
|
//
|
|
// * XCHGB r8, r8
|
|
// * XCHGB m8, r8
|
|
// * XCHGB r8, m8
|
|
//
|
|
func (self *Program) XCHGB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XCHGB", 2, Operands { v0, v1 })
|
|
// XCHGB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x86)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x86)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x86)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XCHGB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x86)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XCHGB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XCHGL performs "Exchange Register/Memory with Register".
|
|
//
|
|
// Mnemonic : XCHG
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * XCHGL r32, eax
|
|
// * XCHGL eax, r32
|
|
// * XCHGL r32, r32
|
|
// * XCHGL m32, r32
|
|
// * XCHGL r32, m32
|
|
//
|
|
func (self *Program) XCHGL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XCHGL", 2, Operands { v0, v1 })
|
|
// XCHGL r32, eax
|
|
if isReg32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x90 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGL eax, r32
|
|
if v0 == EAX && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x90 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XCHGL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XCHGL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XCHGL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XCHGQ performs "Exchange Register/Memory with Register".
|
|
//
|
|
// Mnemonic : XCHG
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * XCHGQ r64, rax
|
|
// * XCHGQ rax, r64
|
|
// * XCHGQ r64, r64
|
|
// * XCHGQ m64, r64
|
|
// * XCHGQ r64, m64
|
|
//
|
|
func (self *Program) XCHGQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XCHGQ", 2, Operands { v0, v1 })
|
|
// XCHGQ r64, rax
|
|
if isReg64(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]))
|
|
m.emit(0x90 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGQ rax, r64
|
|
if v0 == RAX && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x90 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XCHGQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XCHGQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XCHGQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XCHGW performs "Exchange Register/Memory with Register".
|
|
//
|
|
// Mnemonic : XCHG
|
|
// Supported forms : (5 forms)
|
|
//
|
|
// * XCHGW r16, ax
|
|
// * XCHGW ax, r16
|
|
// * XCHGW r16, r16
|
|
// * XCHGW m16, r16
|
|
// * XCHGW r16, m16
|
|
//
|
|
func (self *Program) XCHGW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XCHGW", 2, Operands { v0, v1 })
|
|
// XCHGW r16, ax
|
|
if isReg16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[0], false)
|
|
m.emit(0x90 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGW ax, r16
|
|
if v0 == AX && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x90 | lcode(v[1]))
|
|
})
|
|
}
|
|
// XCHGW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x87)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XCHGW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XCHGW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x87)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XCHGW")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XGETBV performs "Get Value of Extended Control Register".
|
|
//
|
|
// Mnemonic : XGETBV
|
|
// Supported forms : (1 form)
|
|
//
|
|
// * XGETBV
|
|
//
|
|
func (self *Program) XGETBV() *Instruction {
|
|
p := self.alloc("XGETBV", 0, Operands { })
|
|
// XGETBV
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x0f)
|
|
m.emit(0x01)
|
|
m.emit(0xd0)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// XLATB performs "Table Look-up Translation".
|
|
//
|
|
// Mnemonic : XLATB
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XLATB
|
|
// * XLATB
|
|
//
|
|
func (self *Program) XLATB() *Instruction {
|
|
p := self.alloc("XLATB", 0, Operands { })
|
|
// XLATB
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0xd7)
|
|
})
|
|
// XLATB
|
|
p.domain = DomainMisc
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0xd7)
|
|
})
|
|
return p
|
|
}
|
|
|
|
// XORB performs "Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : XOR
|
|
// Supported forms : (6 forms)
|
|
//
|
|
// * XORB imm8, al
|
|
// * XORB imm8, r8
|
|
// * XORB r8, r8
|
|
// * XORB m8, r8
|
|
// * XORB imm8, m8
|
|
// * XORB r8, m8
|
|
//
|
|
func (self *Program) XORB(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORB", 2, Operands { v0, v1 })
|
|
// XORB imm8, al
|
|
if isImm8(v0) && v1 == AL {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x34)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORB imm8, r8
|
|
if isImm8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], isReg8REX(v[1]))
|
|
m.emit(0x80)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORB r8, r8
|
|
if isReg8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x30)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
|
|
m.emit(0x32)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORB m8, r8
|
|
if isM8(v0) && isReg8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
|
|
m.emit(0x32)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XORB imm8, m8
|
|
if isImm8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x80)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORB r8, m8
|
|
if isReg8(v0) && isM8(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
|
|
m.emit(0x30)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORB")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XORL performs "Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : XOR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * XORL imm32, eax
|
|
// * XORL imm8, r32
|
|
// * XORL imm32, r32
|
|
// * XORL r32, r32
|
|
// * XORL m32, r32
|
|
// * XORL imm8, m32
|
|
// * XORL imm32, m32
|
|
// * XORL r32, m32
|
|
//
|
|
func (self *Program) XORL(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORL", 2, Operands { v0, v1 })
|
|
// XORL imm32, eax
|
|
if isImm32(v0) && v1 == EAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x35)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORL imm8, r32
|
|
if isImm8Ext(v0, 4) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORL imm32, r32
|
|
if isImm32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORL r32, r32
|
|
if isReg32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORL m32, r32
|
|
if isM32(v0) && isReg32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XORL imm8, m32
|
|
if isImm8Ext(v0, 4) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORL imm32, m32
|
|
if isImm32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORL r32, m32
|
|
if isReg32(v0) && isM32(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORL")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : XORPD
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XORPD xmm, xmm [SSE2]
|
|
// * XORPD m128, xmm [SSE2]
|
|
//
|
|
func (self *Program) XORPD(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORPD", 2, Operands { v0, v1 })
|
|
// XORPD xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORPD m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE2)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORPD")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values".
|
|
//
|
|
// Mnemonic : XORPS
|
|
// Supported forms : (2 forms)
|
|
//
|
|
// * XORPS xmm, xmm [SSE]
|
|
// * XORPS m128, xmm [SSE]
|
|
//
|
|
func (self *Program) XORPS(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORPS", 2, Operands { v0, v1 })
|
|
// XORPS xmm, xmm
|
|
if isXMM(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x0f)
|
|
m.emit(0x57)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORPS m128, xmm
|
|
if isM128(v0) && isXMM(v1) {
|
|
self.require(ISA_SSE)
|
|
p.domain = DomainMMXSSE
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x0f)
|
|
m.emit(0x57)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORPS")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XORQ performs "Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : XOR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * XORQ imm32, rax
|
|
// * XORQ imm8, r64
|
|
// * XORQ imm32, r64
|
|
// * XORQ r64, r64
|
|
// * XORQ m64, r64
|
|
// * XORQ imm8, m64
|
|
// * XORQ imm32, m64
|
|
// * XORQ r64, m64
|
|
//
|
|
func (self *Program) XORQ(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORQ", 2, Operands { v0, v1 })
|
|
// XORQ imm32, rax
|
|
if isImm32(v0) && v1 == RAX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48)
|
|
m.emit(0x35)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORQ imm8, r64
|
|
if isImm8Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x83)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORQ imm32, r64
|
|
if isImm32Ext(v0, 8) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]))
|
|
m.emit(0x81)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORQ r64, r64
|
|
if isReg64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORQ m64, r64
|
|
if isM64(v0) && isReg64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[1]), addr(v[0]))
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XORQ imm8, m64
|
|
if isImm8Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x83)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORQ imm32, m64
|
|
if isImm32Ext(v0, 8) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, 0, addr(v[1]))
|
|
m.emit(0x81)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm4(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORQ r64, m64
|
|
if isReg64(v0) && isM64(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.rexm(1, hcode(v[0]), addr(v[1]))
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORQ")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// XORW performs "Logical Exclusive OR".
|
|
//
|
|
// Mnemonic : XOR
|
|
// Supported forms : (8 forms)
|
|
//
|
|
// * XORW imm16, ax
|
|
// * XORW imm8, r16
|
|
// * XORW imm16, r16
|
|
// * XORW r16, r16
|
|
// * XORW m16, r16
|
|
// * XORW imm8, m16
|
|
// * XORW imm16, m16
|
|
// * XORW r16, m16
|
|
//
|
|
func (self *Program) XORW(v0 interface{}, v1 interface{}) *Instruction {
|
|
p := self.alloc("XORW", 2, Operands { v0, v1 })
|
|
// XORW imm16, ax
|
|
if isImm16(v0) && v1 == AX {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.emit(0x35)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORW imm8, r16
|
|
if isImm8Ext(v0, 2) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x83)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORW imm16, r16
|
|
if isImm16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, v[1], false)
|
|
m.emit(0x81)
|
|
m.emit(0xf0 | lcode(v[1]))
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORW r16, r16
|
|
if isReg16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), v[1], false)
|
|
m.emit(0x31)
|
|
m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
|
|
})
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), v[0], false)
|
|
m.emit(0x33)
|
|
m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
|
|
})
|
|
}
|
|
// XORW m16, r16
|
|
if isM16(v0) && isReg16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[1]), addr(v[0]), false)
|
|
m.emit(0x33)
|
|
m.mrsd(lcode(v[1]), addr(v[0]), 1)
|
|
})
|
|
}
|
|
// XORW imm8, m16
|
|
if isImm8Ext(v0, 2) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x83)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm1(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORW imm16, m16
|
|
if isImm16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(0, addr(v[1]), false)
|
|
m.emit(0x81)
|
|
m.mrsd(6, addr(v[1]), 1)
|
|
m.imm2(toImmAny(v[0]))
|
|
})
|
|
}
|
|
// XORW r16, m16
|
|
if isReg16(v0) && isM16(v1) {
|
|
p.domain = DomainGeneric
|
|
p.add(0, func(m *_Encoding, v []interface{}) {
|
|
m.emit(0x66)
|
|
m.rexo(hcode(v[0]), addr(v[1]), false)
|
|
m.emit(0x31)
|
|
m.mrsd(lcode(v[0]), addr(v[1]), 1)
|
|
})
|
|
}
|
|
if p.len == 0 {
|
|
panic("invalid operands for XORW")
|
|
}
|
|
return p
|
|
}
|