llvm turned
if instruction.prefixes.rex_unchecked().present() ||
instruction.prefixes.lock() ||
instruction.prefixes.operand_size() ||
instruction.prefixes.rep() ||
instruction.prefixes.repnz() {
return Err(DecodeError::InvalidPrefixes);
}
where the relevant part of prefixes is this thing:
pub struct Prefixes {
bits: u8,
rex: PrefixRex, // a struct containing one u8, no repr funniness
into this monstrosity
│ Disassembly of section .text:
│
│ 0000000000007c90 <yaxpeax_x86::long_mode::read_avx_prefixed>:
│ _ZN11yaxpeax_x869long_mode17read_avx_prefixed17h6b8a56c059f7cfe0E():
4.35 │ push %rbx
│ sub $0x10,%rsp
│ movq 0x20(%rdx),%xmm0
30.57 │ pxor %xmm1,%xmm1
│ punpcklbw %xmm1,%xmm0
11.97 │ pshuflw $0x4,%xmm0,%xmm0
1.05 │ packuswb %xmm0,%xmm0
7.57 │ pand _fini+0xe7c,%xmm0
│ pcmpeqd %xmm1,%xmm1
│ movdqa _fini+0xe8c,%xmm2
│ pcmpeqb %xmm0,%xmm2
│ pxor %xmm2,%xmm1
3.24 │ punpcklbw %xmm1,%xmm1
7.65 │ punpcklwd %xmm1,%xmm1
5.42 │ psrad $0x18,%xmm1
2.19 │ punpcklbw %xmm2,%xmm2
│ pshufd $0x60,%xmm2,%xmm2
│ psrad $0x18,%xmm2
│ pshufd $0xed,%xmm2,%xmm2
│ pshufd $0xe8,%xmm1,%xmm1
│ punpckldq %xmm2,%xmm1
6.49 │ movdqa %xmm0,(%rsp)
│ pslld $0x1f,%xmm1
4.35 │ movmskps %xmm1,%eax
2.17 │ mov $0x3,%bl
│ test %eax,%eax
│ ↓ jne 19f
so i've reached in and done the check by hand
#[inline]
fn vex_invalid(&self) -> bool {
/*
* if instruction.prefixes.rex_unchecked().present()
* || instruction.prefixes.lock()
* || instruction.prefixes.operand_size()
* || instruction.prefixes.rep()
* || instruction.prefixes.repnz() {
*/
// and i'm pretty sure i even got the bit testing correct this time:
// * any of the relevant prefix bit are set, OR
// * any bit set in the rex prefix section
(self.bits & 0b1100_1001) > 0 || (self.rex.bits > 0)
}
...
if instruction.prefixes.vex_invalid() {
return Err(DecodeError::InvalidPrefixes);
}
which makes this ... much less problematic (if also very Wild) code
│ Disassembly of section .text:
│
│ 0000000000007c90 <yaxpeax_x86::long_mode::read_avx_prefixed>:
│ _ZN11yaxpeax_x869long_mode17read_avx_prefixed17h6b8a56c059f7cfe0E():
21.21 │ push %rbx
2.14 │ movzbl 0x20(%rdx),%eax
│ and $0xc9,%al ; here is
│ mov $0x3,%bl ; the entire
│ or 0x21(%rdx),%al ; prefix check
4.28 │ ↓ je 12
│ e: mov %ebx,%eax
│ pop %rbx
│ ← retq
.... but because this code is 0.1% of runtime this is really an aesthetic thing more than a perf thing.