VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac@ 95638

Last change on this file since 95638 was 95577, checked in by vboxsync, 3 years ago

ValKit/bs3-cpu-instr-3: Simple [v]ptest test. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.8 KB
Line 
1; $Id: bs3-cpu-instr-3-template.mac 95577 2022-07-08 23:18:23Z vboxsync $
2;; @file
3; BS3Kit - bs3-cpu-instr-3 - MMX, SSE and AVX instructions, assembly template.
4;
5
6;
7; Copyright (C) 2007-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28;*********************************************************************************************************************************
29;* Header Files *
30;*********************************************************************************************************************************
31%include "bs3kit-template-header.mac" ; setup environment
32
33
34;*********************************************************************************************************************************
35;* External Symbols *
36;*********************************************************************************************************************************
37TMPL_BEGIN_TEXT
38
39
40;
41; Test code snippets containing code which differs between 16-bit, 32-bit
42; and 64-bit CPUs modes.
43;
44%ifdef BS3_INSTANTIATING_CMN
45
46
47;;
48; Variant on BS3_PROC_BEGIN_CMN w/ BS3_PBC_NEAR that prefixes the function
49; with an instruction length byte.
50;
51; ASSUMES the length is between the start of the function and the .again label.
52;
53 %ifndef BS3CPUINSTR3_PROC_BEGIN_CMN_DEFINED
54 %define BS3CPUINSTR3_PROC_BEGIN_CMN_DEFINED
55 %macro BS3CPUINSTR3_PROC_BEGIN_CMN 1
56 align 8, db 0cch
57 db BS3_CMN_NM(%1).again - BS3_CMN_NM(%1)
58BS3_PROC_BEGIN_CMN %1, BS3_PBC_NEAR
59 %endmacro
60 %endif
61
62;;
63; The EMIT_INSTR_PLUS_ICEBP macros is for creating a common function for and
64; named after a single instruction, followed by a looping ICEBP.
65;
66; This works like a prefix to the instruction invocation, only exception is that
67; instead of [fs:xBX] you write FSxBS as that's what is wanted in the name.
68;
69 %ifndef EMIT_INSTR_PLUS_ICEBP_DEFINED
70 %define EMIT_INSTR_PLUS_ICEBP_DEFINED
71
72 %macro EMIT_INSTR_PLUS_ICEBP 2
73BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _icebp
74 %define FSxBX [fs:xBX]
75 %1 %2
76 %undef FSxBX
77.again:
78 icebp
79 jmp .again
80BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _icebp
81 %endmacro
82
83 %macro EMIT_INSTR_PLUS_ICEBP 3
84BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _icebp
85 %define FSxBX [fs:xBX]
86 %1 %2, %3
87 %undef FSxBX
88.again:
89 icebp
90 jmp .again
91BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _icebp
92 %endmacro
93
94 %macro EMIT_INSTR_PLUS_ICEBP 4
95BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _icebp
96 %define FSxBX [fs:xBX]
97 %1 %2, %3, %4
98 %undef FSxBX
99.again:
100 icebp
101 jmp .again
102BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _icebp
103 %endmacro
104
105 %macro EMIT_INSTR_PLUS_ICEBP 5
106BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _ %+ %5 %+ _icebp
107 %define FSxBX [fs:xBX]
108 %1 %2, %3, %4, %5
109 %undef FSxBX
110.again:
111 icebp
112 jmp .again
113BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _ %+ %5 %+ _icebp
114 %endmacro
115
116 %endif
117
118;;
119; Companion to EMIT_INSTR_PLUS_ICEBP for dealing stuff that the assmbler does
120; not want to emit.
121;
122; @param 1 The function name (omitting bs3CpuInstr3_ and _icebp).
123; @param 2+ The opcode bytes. FSxBX_PFX and FSxBX_MODRM are defined locally.
124;
125 %ifndef EMIT_INSTR_PLUS_ICEBP_BYTES_DEFINED
126 %define EMIT_INSTR_PLUS_ICEBP_BYTES_DEFINED
127
128 %macro EMIT_INSTR_PLUS_ICEBP_BYTES 2+
129BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _icebp
130 %define FSxBX_PFX 64h
131 %if TMPL_BITS == 16
132 %define FSxBX_MODRM 07h
133 %else
134 %define FSxBX_MODRM 03h
135 %endif
136 db %2
137 %undef FSxBX_MODRM
138 %undef FSxBX_PFX
139.again:
140 icebp
141 jmp .again
142BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _icebp
143 %endmacro
144 %endif
145
146
147
148%ifndef EMIT_TYPE1_INSTR_DEFINED
149 %define EMIT_TYPE1_INSTR_DEFINED
150 ;; @param 7 Indicates whether the 2nd and 3rd pair has MMX variants.
151 %macro EMIT_TYPE1_INSTR 7
152;
153; PXOR (SSE2) & VPXOR (AVX2)
154;
155BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp
156 %1 mm1, mm2
157.again:
158 icebp
159 jmp .again
160BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp
161
162BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp
163 %1 mm1, [fs:xBX]
164.again:
165 icebp
166 jmp .again
167BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp
168
169BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp
170 %1 xmm1, xmm2
171.again:
172 icebp
173 jmp .again
174BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp
175
176BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp
177 %1 xmm1, [fs:xBX]
178.again:
179 icebp
180 jmp .again
181BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp
182
183BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_XMM2_icebp
184 %2 xmm1, xmm1, xmm2
185.again:
186 icebp
187 jmp .again
188BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_XMM2_icebp
189
190BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_FSxBX_icebp
191 %2 xmm1, xmm1, [fs:xBX]
192.again:
193 icebp
194 jmp .again
195BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_FSxBX_icebp
196
197BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_YMM3_icebp
198 %2 ymm7, ymm2, ymm3
199.again:
200 icebp
201 jmp .again
202BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_YMM3_icebp
203
204BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_FSxBX_icebp
205 %2 ymm7, ymm2, [fs:xBX]
206.again:
207 icebp
208 jmp .again
209BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_FSxBX_icebp
210
211
212;
213; XORPS (SSE2) & VXORPS (AVX)
214;
215 %if %7 != 0
216BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_MM2_icebp
217 %3 mm1, mm2
218.again:
219 icebp
220 jmp .again
221BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_MM2_icebp
222
223BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_FSxBX_icebp
224 %3 mm1, [fs:xBX]
225.again:
226 icebp
227 jmp .again
228BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_FSxBX_icebp
229 %endif
230
231BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_XMM2_icebp
232 %3 xmm1, xmm2
233.again:
234 icebp
235 jmp .again
236BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_XMM2_icebp
237
238BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_FSxBX_icebp
239 %3 xmm1, [fs:xBX]
240.again:
241 icebp
242 jmp .again
243BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_FSxBX_icebp
244
245BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_XMM2_icebp
246 %4 xmm1, xmm1, xmm2
247.again:
248 icebp
249 jmp .again
250BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_XMM2_icebp
251
252BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_FSxBX_icebp
253 %4 xmm1, xmm1, [fs:xBX]
254.again:
255 icebp
256 jmp .again
257BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_FSxBX_icebp
258
259BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_YMM2_icebp
260 %4 ymm1, ymm1, ymm2
261.again:
262 icebp
263 jmp .again
264BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_YMM2_icebp
265
266BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_FSxBX_icebp
267 %4 ymm1, ymm1, [fs:xBX]
268.again:
269 icebp
270 jmp .again
271BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_FSxBX_icebp
272
273
274
275;
276; XORPD (SSE2) & VXORPD (AVX)
277;
278 %if %7 != 0
279BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_MM2_icebp
280 %5 mm1, mm2
281.again:
282 icebp
283 jmp .again
284BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_MM2_icebp
285
286BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_FSxBX_icebp
287 %5 mm1, [fs:xBX]
288.again:
289 icebp
290 jmp .again
291BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_FSxBX_icebp
292 %endif
293
294BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_XMM2_icebp
295 %5 xmm1, xmm2
296.again:
297 icebp
298 jmp .again
299BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_XMM2_icebp
300
301BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_FSxBX_icebp
302 %5 xmm1, [fs:xBX]
303.again:
304 icebp
305 jmp .again
306BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_FSxBX_icebp
307
308BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_XMM0_icebp
309 %6 xmm2, xmm1, xmm0
310.again:
311 icebp
312 jmp .again
313BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_XMM0_icebp
314
315BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_FSxBX_icebp
316 %6 xmm2, xmm1, [fs:xBX]
317.again:
318 icebp
319 jmp .again
320BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_FSxBX_icebp
321
322BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_YMM0_icebp
323 %6 ymm2, ymm1, ymm0
324.again:
325 icebp
326 jmp .again
327BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_YMM0_icebp
328
329BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_FSxBX_icebp
330 %6 ymm2, ymm1, [fs:xBX]
331.again:
332 icebp
333 jmp .again
334BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_FSxBX_icebp
335
336 %if TMPL_BITS == 64
337BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM10_YMM8_YMM15_icebp
338 %6 ymm10, ymm8, ymm15
339.again:
340 icebp
341 jmp .again
342BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM10_YMM8_YMM15_icebp
343 %endif
344
345 %endmacro ; EMIT_TYPE1_INSTR
346
347 %macro EMIT_TYPE1_ONE_INSTR 3
348 %if %3 != 0
349BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp
350 %1 mm1, mm2
351.again:
352 icebp
353 jmp .again
354BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp
355
356BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp
357 %1 mm1, [fs:xBX]
358.again:
359 icebp
360 jmp .again
361BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp
362 %endif
363
364BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp
365 %1 xmm1, xmm2
366.again:
367 icebp
368 jmp .again
369BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp
370
371BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp
372 %1 xmm1, [fs:xBX]
373.again:
374 icebp
375 jmp .again
376BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp
377
378BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_XMM0_icebp
379 %2 xmm2, xmm1, xmm0
380.again:
381 icebp
382 jmp .again
383BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_XMM0_icebp
384
385BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_FSxBX_icebp
386 %2 xmm2, xmm1, [fs:xBX]
387.again:
388 icebp
389 jmp .again
390BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_FSxBX_icebp
391
392BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_YMM0_icebp
393 %2 ymm2, ymm1, ymm0
394.again:
395 icebp
396 jmp .again
397BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_YMM0_icebp
398
399BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_FSxBX_icebp
400 %2 ymm2, ymm1, [fs:xBX]
401.again:
402 icebp
403 jmp .again
404BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_FSxBX_icebp
405
406 %if TMPL_BITS == 64
407BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM10_YMM8_YMM15_icebp
408 %2 ymm10, ymm8, ymm15
409.again:
410 icebp
411 jmp .again
412BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM10_YMM8_YMM15_icebp
413 %endif
414 %endmacro ; EMIT_TYPE1_ONE_INSTR
415
416%endif
417
418EMIT_TYPE1_INSTR pand, vpand, andps, vandps, andpd, vandpd, 0
419EMIT_TYPE1_INSTR pandn, vpandn, andnps, vandnps, andnpd, vandnpd, 0
420EMIT_TYPE1_INSTR por, vpor, orps, vorps, orpd, vorpd, 0
421EMIT_TYPE1_INSTR pxor, vpxor, xorps, vxorps, xorpd, vxorpd, 0
422
423EMIT_TYPE1_INSTR pcmpgtb, vpcmpgtb, pcmpgtw, vpcmpgtw, pcmpgtd, vpcmpgtd, 1
424EMIT_TYPE1_ONE_INSTR pcmpgtq, vpcmpgtq, 0
425EMIT_TYPE1_INSTR pcmpeqb, vpcmpeqb, pcmpeqw, vpcmpeqw, pcmpeqd, vpcmpeqd, 1
426EMIT_TYPE1_ONE_INSTR pcmpeqq, vpcmpeqq, 0
427
428EMIT_TYPE1_INSTR paddb, vpaddb, paddw, vpaddw, paddd, vpaddd, 1
429EMIT_TYPE1_ONE_INSTR paddq, vpaddq, 1
430
431EMIT_TYPE1_INSTR psubb, vpsubb, psubw, vpsubw, psubd, vpsubd, 1
432EMIT_TYPE1_ONE_INSTR psubq, vpsubq, 1
433
434
435;
436; Type 2 instructions. On the form: pxxxx sAX, [zy]mm0
437;
438%ifndef EMIT_TYPE2_ONE_INSTR_DEFINED
439 %define EMIT_TYPE2_ONE_INSTR_DEFINED
440 ;; @param 1 MMX/SSE instruction name
441 ;; @param 2 AVX instruction name
442 ;; @param 3 Whether to emit MMX function
443 ;; @param 4 The opcode byte. (assuming two byte / vex map 1)
444 %macro EMIT_TYPE2_ONE_INSTR 4
445 %if %3 != 0
446BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_MM2_icebp
447 %1 eax, mm2
448.again:
449 icebp
450 jmp .again
451BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_MM2_icebp
452
453BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_qword_FSxBX_icebp
454 %if TMPL_BITS == 16
455 db 64h, 0fh, %4, 7 ; %1 eax, qword [fs:xBX]
456 %else
457 db 64h, 0fh, %4, 3 ; %1 eax, qword [fs:xBX]
458 %endif
459.again:
460 icebp
461 jmp .again
462BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_qword_FSxBX_icebp
463 %endif
464
465BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_XMM2_icebp
466 %1 eax, xmm2
467.again:
468 icebp
469 jmp .again
470BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_XMM2_icebp
471
472BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_dqword_FSxBX_icebp
473 %if TMPL_BITS == 16
474 db 64h, 66h, 0fh, %4, 7 ; %1 eax, dqword [fs:xBX]
475 %else
476 db 64h, 66h, 0fh, %4, 3 ; %1 eax, dqword [fs:xBX]
477 %endif
478.again:
479 icebp
480 jmp .again
481BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_dqword_FSxBX_icebp
482
483BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_XMM2_icebp
484 %2 eax, xmm2
485.again:
486 icebp
487 jmp .again
488BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_XMM2_icebp
489
490BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_dqword_FSxBX_icebp
491 %if TMPL_BITS == 16
492 db 64h, 0c4h, 0e0h, 071h, %4, 7 ; %2 eax, dqword [fs:xBX]
493 %else
494 db 64h, 0c4h, 0e0h, 071h, %4, 3 ; %2 eax, dqword [fs:xBX]
495 %endif
496.again:
497 icebp
498 jmp .again
499BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_dqword_FSxBX_icebp
500
501BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_YMM2_icebp
502 %2 eax, ymm2
503.again:
504 icebp
505 jmp .again
506BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_YMM2_icebp
507
508BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_qqword_FSxBX_icebp
509 %if TMPL_BITS == 16
510 db 64h, 0c4h, 0e0h, 075h, %4, 7 ; %2 eax, qqword [fs:xBX]
511 %else
512 db 64h, 0c4h, 0e0h, 075h, %4, 3 ; %2 eax, qqword [fs:xBX]
513 %endif
514.again:
515 icebp
516 jmp .again
517BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_qqword_FSxBX_icebp
518
519 %if TMPL_BITS == 64
520BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _RAX_YMM9_icebp
521 %2 rax, ymm9
522.again:
523 icebp
524 jmp .again
525BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _RAX_YMM9_icebp
526 %endif
527 %endmacro ; EMIT_TYPE2_ONE_INSTR
528%endif
529
530EMIT_TYPE2_ONE_INSTR pmovmskb, vpmovmskb, 1, 0d7h
531
532;
533; [V]PSHUFB
534;
535EMIT_INSTR_PLUS_ICEBP pshufb, MM1, MM2
536EMIT_INSTR_PLUS_ICEBP pshufb, MM1, FSxBX
537
538EMIT_INSTR_PLUS_ICEBP pshufb, XMM1, XMM2
539EMIT_INSTR_PLUS_ICEBP pshufb, XMM1, FSxBX
540 %if TMPL_BITS == 64
541EMIT_INSTR_PLUS_ICEBP pshufb, XMM8, XMM9
542EMIT_INSTR_PLUS_ICEBP pshufb, XMM8, FSxBX
543 %endif
544
545EMIT_INSTR_PLUS_ICEBP vpshufb, XMM1, XMM2, XMM3
546EMIT_INSTR_PLUS_ICEBP vpshufb, XMM1, XMM2, FSxBX
547 %if TMPL_BITS == 64
548EMIT_INSTR_PLUS_ICEBP vpshufb, XMM8, XMM9, XMM10
549EMIT_INSTR_PLUS_ICEBP vpshufb, XMM8, XMM9, FSxBX
550 %endif
551
552EMIT_INSTR_PLUS_ICEBP vpshufb, YMM1, YMM2, YMM3
553EMIT_INSTR_PLUS_ICEBP vpshufb, YMM1, YMM2, FSxBX
554 %if TMPL_BITS == 64
555EMIT_INSTR_PLUS_ICEBP vpshufb, YMM8, YMM9, YMM10
556EMIT_INSTR_PLUS_ICEBP vpshufb, YMM8, YMM9, FSxBX
557 %endif
558
559;
560; PSHUFW
561;
562EMIT_INSTR_PLUS_ICEBP pshufw, MM1, MM2, 0FFh ; FF = top src word in all destination words
563EMIT_INSTR_PLUS_ICEBP pshufw, MM1, FSxBX, 0FFh
564EMIT_INSTR_PLUS_ICEBP pshufw, MM1, MM2, 01Bh ; 1B = word swap (like bswap but for words)
565EMIT_INSTR_PLUS_ICEBP pshufw, MM1, FSxBX, 01Bh
566
567;
568; [V]PSHUFHW
569;
570EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, XMM2, 0FFh
571EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, FSxBX, 0FFh
572EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, XMM2, 01Bh
573EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, FSxBX, 01Bh
574
575EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, XMM2, 0FFh
576EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, FSxBX, 0FFh
577EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, XMM2, 01Bh
578EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, FSxBX, 01Bh
579
580EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, YMM2, 0FFh
581EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, FSxBX, 0FFh
582EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, YMM2, 01Bh
583EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, FSxBX, 01Bh
584
585 %if TMPL_BITS == 64
586EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM12, YMM7, 0FFh
587EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM9, YMM12, 01Bh
588 %endif
589
590;
591; [V]PSHUFLW
592;
593EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, XMM2, 0FFh
594EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, FSxBX, 0FFh
595EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, XMM2, 01Bh
596EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, FSxBX, 01Bh
597
598EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, XMM2, 0FFh
599EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, FSxBX, 0FFh
600EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, XMM2, 01Bh
601EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, FSxBX, 01Bh
602
603EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, YMM2, 0FFh
604EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, FSxBX, 0FFh
605EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, YMM2, 01Bh
606EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, FSxBX, 01Bh
607
608 %if TMPL_BITS == 64
609EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM12, YMM7, 0FFh
610EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM9, YMM12, 01Bh
611 %endif
612
613;
614; [V]PSHUFD
615;
616EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, XMM2, 0FFh
617EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, FSxBX, 0FFh
618EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, XMM2, 01Bh
619EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, FSxBX, 01Bh
620
621EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, XMM2, 0FFh
622EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, FSxBX, 0FFh
623EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, XMM2, 01Bh
624EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, FSxBX, 01Bh
625
626EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, YMM2, 0FFh
627EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, FSxBX, 0FFh
628EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, YMM2, 01Bh
629EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, FSxBX, 01Bh
630
631 %if TMPL_BITS == 64
632EMIT_INSTR_PLUS_ICEBP vpshufd, YMM12, YMM7, 0FFh
633EMIT_INSTR_PLUS_ICEBP vpshufd, YMM9, YMM12, 01Bh
634 %endif
635
636;
637; [V]PUNPCKHBW
638;
639EMIT_INSTR_PLUS_ICEBP punpckhbw, MM1, MM2
640EMIT_INSTR_PLUS_ICEBP punpckhbw, MM1, FSxBX
641
642EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM1, XMM2
643EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM1, FSxBX
644 %if TMPL_BITS == 64
645EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM8, XMM9
646EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM8, FSxBX
647 %endif
648
649EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM1, XMM2, XMM3
650EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM1, XMM2, FSxBX
651 %if TMPL_BITS == 64
652EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM8, XMM9, XMM10
653EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM8, XMM9, FSxBX
654 %endif
655
656EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM1, YMM2, YMM3
657EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM1, YMM2, FSxBX
658 %if TMPL_BITS == 64
659EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM8, YMM9, YMM10
660EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM8, YMM9, FSxBX
661 %endif
662
663;
664; [V]PUNPCKHWD
665;
666EMIT_INSTR_PLUS_ICEBP punpckhwd, MM1, MM2
667EMIT_INSTR_PLUS_ICEBP punpckhwd, MM1, FSxBX
668
669EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM1, XMM2
670EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM1, FSxBX
671 %if TMPL_BITS == 64
672EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM8, XMM9
673EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM8, FSxBX
674 %endif
675
676EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM1, XMM2, XMM3
677EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM1, XMM2, FSxBX
678 %if TMPL_BITS == 64
679EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM8, XMM9, XMM10
680EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM8, XMM9, FSxBX
681 %endif
682
683EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM1, YMM2, YMM3
684EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM1, YMM2, FSxBX
685 %if TMPL_BITS == 64
686EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM8, YMM9, YMM10
687EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM8, YMM9, FSxBX
688 %endif
689
690;
691; [V]PUNPCKHDQ
692;
693EMIT_INSTR_PLUS_ICEBP punpckhdq, MM1, MM2
694EMIT_INSTR_PLUS_ICEBP punpckhdq, MM1, FSxBX
695
696EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM1, XMM2
697EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM1, FSxBX
698 %if TMPL_BITS == 64
699EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM8, XMM9
700EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM8, FSxBX
701 %endif
702
703EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM1, XMM2, XMM3
704EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM1, XMM2, FSxBX
705 %if TMPL_BITS == 64
706EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM8, XMM9, XMM10
707EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM8, XMM9, FSxBX
708 %endif
709
710EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM1, YMM2, YMM3
711EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM1, YMM2, FSxBX
712 %if TMPL_BITS == 64
713EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM8, YMM9, YMM10
714EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM8, YMM9, FSxBX
715 %endif
716
717;
718; [V]PUNPCKHQDQ (no MMX)
719;
720EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM1, XMM2
721EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM1, FSxBX
722 %if TMPL_BITS == 64
723EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM8, XMM9
724EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM8, FSxBX
725 %endif
726
727EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM1, XMM2, XMM3
728EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM1, XMM2, FSxBX
729 %if TMPL_BITS == 64
730EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM8, XMM9, XMM10
731EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM8, XMM9, FSxBX
732 %endif
733
734EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM1, YMM2, YMM3
735EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM1, YMM2, FSxBX
736 %if TMPL_BITS == 64
737EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM8, YMM9, YMM10
738EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM8, YMM9, FSxBX
739 %endif
740
741;
742; [V]PUNPCKLBW
743;
744EMIT_INSTR_PLUS_ICEBP punpcklbw, MM1, MM2
745EMIT_INSTR_PLUS_ICEBP punpcklbw, MM1, FSxBX
746
747EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM1, XMM2
748EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM1, FSxBX
749 %if TMPL_BITS == 64
750EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM8, XMM9
751EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM8, FSxBX
752 %endif
753
754EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM1, XMM2, XMM3
755EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM1, XMM2, FSxBX
756 %if TMPL_BITS == 64
757EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM8, XMM9, XMM10
758EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM8, XMM9, FSxBX
759 %endif
760
761EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM1, YMM2, YMM3
762EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM1, YMM2, FSxBX
763 %if TMPL_BITS == 64
764EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM8, YMM9, YMM10
765EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM8, YMM9, FSxBX
766 %endif
767
768;
769; [V]PUNPCKLWD
770;
771EMIT_INSTR_PLUS_ICEBP punpcklwd, MM1, MM2
772EMIT_INSTR_PLUS_ICEBP punpcklwd, MM1, FSxBX
773
774EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM1, XMM2
775EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM1, FSxBX
776 %if TMPL_BITS == 64
777EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM8, XMM9
778EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM8, FSxBX
779 %endif
780
781EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM1, XMM2, XMM3
782EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM1, XMM2, FSxBX
783 %if TMPL_BITS == 64
784EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM8, XMM9, XMM10
785EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM8, XMM9, FSxBX
786 %endif
787
788EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM1, YMM2, YMM3
789EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM1, YMM2, FSxBX
790 %if TMPL_BITS == 64
791EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM8, YMM9, YMM10
792EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM8, YMM9, FSxBX
793 %endif
794
795;
796; [V]PUNPCKLDQ
797;
798EMIT_INSTR_PLUS_ICEBP punpckldq, MM1, MM2
799EMIT_INSTR_PLUS_ICEBP punpckldq, MM1, FSxBX
800
801EMIT_INSTR_PLUS_ICEBP punpckldq, XMM1, XMM2
802EMIT_INSTR_PLUS_ICEBP punpckldq, XMM1, FSxBX
803 %if TMPL_BITS == 64
804EMIT_INSTR_PLUS_ICEBP punpckldq, XMM8, XMM9
805EMIT_INSTR_PLUS_ICEBP punpckldq, XMM8, FSxBX
806 %endif
807
808EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM1, XMM2, XMM3
809EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM1, XMM2, FSxBX
810 %if TMPL_BITS == 64
811EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM8, XMM9, XMM10
812EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM8, XMM9, FSxBX
813 %endif
814
815EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM1, YMM2, YMM3
816EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM1, YMM2, FSxBX
817 %if TMPL_BITS == 64
818EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM8, YMM9, YMM10
819EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM8, YMM9, FSxBX
820 %endif
821
822;
823; [V]PUNPCKLQDQ (no MMX)
824;
825EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM1, XMM2
826EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM1, FSxBX
827 %if TMPL_BITS == 64
828EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM8, XMM9
829EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM8, FSxBX
830 %endif
831
832EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM1, XMM2, XMM3
833EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM1, XMM2, FSxBX
834 %if TMPL_BITS == 64
835EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM8, XMM9, XMM10
836EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM8, XMM9, FSxBX
837 %endif
838
839EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM1, YMM2, YMM3
840EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM1, YMM2, FSxBX
841 %if TMPL_BITS == 64
842EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM8, YMM9, YMM10
843EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM8, YMM9, FSxBX
844 %endif
845
846;
847; [V]PACKSSWB
848;
849EMIT_INSTR_PLUS_ICEBP packsswb, MM1, MM2
850EMIT_INSTR_PLUS_ICEBP packsswb, MM1, FSxBX
851
852EMIT_INSTR_PLUS_ICEBP packsswb, XMM1, XMM2
853EMIT_INSTR_PLUS_ICEBP packsswb, XMM1, FSxBX
854 %if TMPL_BITS == 64
855EMIT_INSTR_PLUS_ICEBP packsswb, XMM8, XMM9
856EMIT_INSTR_PLUS_ICEBP packsswb, XMM8, FSxBX
857 %endif
858
859EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM1, XMM2, XMM3
860EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM1, XMM2, FSxBX
861 %if TMPL_BITS == 64
862EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM8, XMM9, XMM10
863EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM8, XMM9, FSxBX
864 %endif
865
866EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM1, YMM2, YMM3
867EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM1, YMM2, FSxBX
868 %if TMPL_BITS == 64
869EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM8, YMM9, YMM10
870EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM8, YMM9, FSxBX
871 %endif
872
873;
874; [V]PACKSSWD
875;
876EMIT_INSTR_PLUS_ICEBP packssdw, MM1, MM2
877EMIT_INSTR_PLUS_ICEBP packssdw, MM1, FSxBX
878
879EMIT_INSTR_PLUS_ICEBP packssdw, XMM1, XMM2
880EMIT_INSTR_PLUS_ICEBP packssdw, XMM1, FSxBX
881 %if TMPL_BITS == 64
882EMIT_INSTR_PLUS_ICEBP packssdw, XMM8, XMM9
883EMIT_INSTR_PLUS_ICEBP packssdw, XMM8, FSxBX
884 %endif
885
886EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM1, XMM2, XMM3
887EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM1, XMM2, FSxBX
888 %if TMPL_BITS == 64
889EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM8, XMM9, XMM10
890EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM8, XMM9, FSxBX
891 %endif
892
893EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM1, YMM2, YMM3
894EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM1, YMM2, FSxBX
895 %if TMPL_BITS == 64
896EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM8, YMM9, YMM10
897EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM8, YMM9, FSxBX
898 %endif
899
900;
901; [V]PACKUSWB
902;
903EMIT_INSTR_PLUS_ICEBP packuswb, MM1, MM2
904EMIT_INSTR_PLUS_ICEBP packuswb, MM1, FSxBX
905
906EMIT_INSTR_PLUS_ICEBP packuswb, XMM1, XMM2
907EMIT_INSTR_PLUS_ICEBP packuswb, XMM1, FSxBX
908 %if TMPL_BITS == 64
909EMIT_INSTR_PLUS_ICEBP packuswb, XMM8, XMM9
910EMIT_INSTR_PLUS_ICEBP packuswb, XMM8, FSxBX
911 %endif
912
913EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM1, XMM2, XMM3
914EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM1, XMM2, FSxBX
915 %if TMPL_BITS == 64
916EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM8, XMM9, XMM10
917EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM8, XMM9, FSxBX
918 %endif
919
920EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM1, YMM2, YMM3
921EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM1, YMM2, FSxBX
922 %if TMPL_BITS == 64
923EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM8, YMM9, YMM10
924EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM8, YMM9, FSxBX
925 %endif
926
927;
928; [V]PACKUSWD (no MMX)
929;
930EMIT_INSTR_PLUS_ICEBP packusdw, XMM1, XMM2
931EMIT_INSTR_PLUS_ICEBP packusdw, XMM1, FSxBX
932 %if TMPL_BITS == 64
933EMIT_INSTR_PLUS_ICEBP packusdw, XMM8, XMM9
934EMIT_INSTR_PLUS_ICEBP packusdw, XMM8, FSxBX
935 %endif
936
937EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM1, XMM2, XMM3
938EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM1, XMM2, FSxBX
939 %if TMPL_BITS == 64
940EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM8, XMM9, XMM10
941EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM8, XMM9, FSxBX
942 %endif
943
944EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM1, YMM2, YMM3
945EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM1, YMM2, FSxBX
946 %if TMPL_BITS == 64
947EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM8, YMM9, YMM10
948EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM8, YMM9, FSxBX
949 %endif
950
951;
952; [V]MOVNTDQA
953;
954EMIT_INSTR_PLUS_ICEBP movntdqa, XMM1, FSxBX
955EMIT_INSTR_PLUS_ICEBP vmovntdqa, XMM1, FSxBX
956EMIT_INSTR_PLUS_ICEBP vmovntdqa, YMM1, FSxBX
957 %if TMPL_BITS == 64
958EMIT_INSTR_PLUS_ICEBP movntdqa, XMM10, FSxBX
959EMIT_INSTR_PLUS_ICEBP vmovntdqa, XMM11, FSxBX
960EMIT_INSTR_PLUS_ICEBP vmovntdqa, YMM12, FSxBX
961 %endif
962
963;
964; [V]MOVNTDQ
965;
966EMIT_INSTR_PLUS_ICEBP movntdq, FSxBX, XMM1
967EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, XMM1
968EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, YMM1
969 %if TMPL_BITS == 64
970EMIT_INSTR_PLUS_ICEBP movntdq, FSxBX, XMM10
971EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, XMM10
972EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, YMM10
973 %endif
974
975
976;
977; [V]MOVNTPS
978;
979EMIT_INSTR_PLUS_ICEBP movntps, FSxBX, XMM1
980EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, XMM1
981EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, YMM1
982 %if TMPL_BITS == 64
983EMIT_INSTR_PLUS_ICEBP movntps, FSxBX, XMM10
984EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, XMM11
985EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, YMM12
986 %endif
987
988;
989; [V]MOVNTPD
990;
991EMIT_INSTR_PLUS_ICEBP movntpd, FSxBX, XMM1
992EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, XMM1
993EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, YMM1
994 %if TMPL_BITS == 64
995EMIT_INSTR_PLUS_ICEBP movntpd, FSxBX, XMM10
996EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, XMM11
997EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, YMM12
998 %endif
999
1000;
1001; [V]MOVUPS - not testing the 2nd register variant.
1002;
1003EMIT_INSTR_PLUS_ICEBP movups, XMM1, XMM2
1004EMIT_INSTR_PLUS_ICEBP movups, XMM1, FSxBX
1005EMIT_INSTR_PLUS_ICEBP movups, FSxBX, XMM1
1006EMIT_INSTR_PLUS_ICEBP vmovups, XMM1, XMM2
1007EMIT_INSTR_PLUS_ICEBP vmovups, XMM1, FSxBX
1008EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, XMM1
1009EMIT_INSTR_PLUS_ICEBP vmovups, YMM1, YMM2
1010EMIT_INSTR_PLUS_ICEBP vmovups, YMM1, FSxBX
1011EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, YMM1
1012 %if TMPL_BITS == 64
1013EMIT_INSTR_PLUS_ICEBP movups, XMM8, XMM12
1014EMIT_INSTR_PLUS_ICEBP movups, XMM10, FSxBX
1015EMIT_INSTR_PLUS_ICEBP movups, FSxBX, XMM10
1016EMIT_INSTR_PLUS_ICEBP vmovups, XMM7, XMM14
1017EMIT_INSTR_PLUS_ICEBP vmovups, XMM11, FSxBX
1018EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, XMM11
1019EMIT_INSTR_PLUS_ICEBP vmovups, YMM12, YMM8
1020EMIT_INSTR_PLUS_ICEBP vmovups, YMM12, FSxBX
1021EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, YMM12
1022 %endif
1023
1024;
1025; [V]MOVUPD - not testing the 2nd register variant.
1026;
1027EMIT_INSTR_PLUS_ICEBP movupd, XMM1, XMM2
1028EMIT_INSTR_PLUS_ICEBP movupd, XMM1, FSxBX
1029EMIT_INSTR_PLUS_ICEBP movupd, FSxBX, XMM1
1030EMIT_INSTR_PLUS_ICEBP vmovupd, XMM1, XMM2
1031EMIT_INSTR_PLUS_ICEBP vmovupd, XMM1, FSxBX
1032EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, XMM1
1033EMIT_INSTR_PLUS_ICEBP vmovupd, YMM1, YMM2
1034EMIT_INSTR_PLUS_ICEBP vmovupd, YMM1, FSxBX
1035EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, YMM1
1036 %if TMPL_BITS == 64
1037EMIT_INSTR_PLUS_ICEBP movupd, XMM8, XMM12
1038EMIT_INSTR_PLUS_ICEBP movupd, XMM10, FSxBX
1039EMIT_INSTR_PLUS_ICEBP movupd, FSxBX, XMM10
1040EMIT_INSTR_PLUS_ICEBP vmovupd, XMM7, XMM14
1041EMIT_INSTR_PLUS_ICEBP vmovupd, XMM11, FSxBX
1042EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, XMM11
1043EMIT_INSTR_PLUS_ICEBP vmovupd, YMM12, YMM8
1044EMIT_INSTR_PLUS_ICEBP vmovupd, YMM12, FSxBX
1045EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, YMM12
1046 %endif
1047
1048;
1049; [V]MOVSS - not testing the 2nd register variant.
1050;
1051EMIT_INSTR_PLUS_ICEBP movss, XMM1, XMM2
1052EMIT_INSTR_PLUS_ICEBP movss, XMM1, FSxBX
1053EMIT_INSTR_PLUS_ICEBP movss, FSxBX, XMM1
1054EMIT_INSTR_PLUS_ICEBP vmovss, XMM1, XMM2
1055EMIT_INSTR_PLUS_ICEBP vmovss, XMM1, FSxBX
1056EMIT_INSTR_PLUS_ICEBP vmovss, FSxBX, XMM1
1057 %if TMPL_BITS == 64
1058EMIT_INSTR_PLUS_ICEBP movss, XMM11, XMM8
1059EMIT_INSTR_PLUS_ICEBP movss, XMM8, FSxBX
1060EMIT_INSTR_PLUS_ICEBP movss, FSxBX, XMM11
1061EMIT_INSTR_PLUS_ICEBP vmovss, XMM9, XMM10
1062EMIT_INSTR_PLUS_ICEBP vmovss, XMM10, FSxBX
1063EMIT_INSTR_PLUS_ICEBP vmovss, FSxBX, XMM9
1064 %endif
1065
1066;
1067; [V]MOVSD - not testing the 2nd register variant.
1068;
1069EMIT_INSTR_PLUS_ICEBP movsd, XMM1, XMM2
1070EMIT_INSTR_PLUS_ICEBP movsd, XMM1, FSxBX
1071EMIT_INSTR_PLUS_ICEBP movsd, FSxBX, XMM1
1072EMIT_INSTR_PLUS_ICEBP vmovsd, XMM1, XMM2
1073EMIT_INSTR_PLUS_ICEBP vmovsd, XMM1, FSxBX
1074EMIT_INSTR_PLUS_ICEBP vmovsd, FSxBX, XMM1
1075 %if TMPL_BITS == 64
1076EMIT_INSTR_PLUS_ICEBP movsd, XMM11, XMM8
1077EMIT_INSTR_PLUS_ICEBP movsd, XMM8, FSxBX
1078EMIT_INSTR_PLUS_ICEBP movsd, FSxBX, XMM11
1079EMIT_INSTR_PLUS_ICEBP vmovsd, XMM9, XMM10
1080EMIT_INSTR_PLUS_ICEBP vmovsd, XMM10, FSxBX
1081EMIT_INSTR_PLUS_ICEBP vmovsd, FSxBX, XMM9
1082 %endif
1083
1084;
1085; [V]MOVLPS
1086;
1087EMIT_INSTR_PLUS_ICEBP movlps, XMM1, FSxBX
1088EMIT_INSTR_PLUS_ICEBP movlps, FSxBX, XMM1
1089EMIT_INSTR_PLUS_ICEBP vmovlps, XMM1, XMM2, FSxBX
1090EMIT_INSTR_PLUS_ICEBP vmovlps, FSxBX, XMM1
1091 %if TMPL_BITS == 64
1092EMIT_INSTR_PLUS_ICEBP movlps, XMM8, FSxBX
1093EMIT_INSTR_PLUS_ICEBP movlps, FSxBX, XMM11
1094EMIT_INSTR_PLUS_ICEBP vmovlps, XMM10, XMM14, FSxBX
1095EMIT_INSTR_PLUS_ICEBP vmovlps, FSxBX, XMM9
1096 %endif
1097
1098;
1099; [V]MOVLPD
1100;
1101EMIT_INSTR_PLUS_ICEBP movlpd, XMM1, FSxBX
1102EMIT_INSTR_PLUS_ICEBP movlpd, FSxBX, XMM1
1103EMIT_INSTR_PLUS_ICEBP vmovlpd, XMM1, XMM2, FSxBX
1104EMIT_INSTR_PLUS_ICEBP vmovlpd, FSxBX, XMM1
1105 %if TMPL_BITS == 64
1106EMIT_INSTR_PLUS_ICEBP movlpd, XMM8, FSxBX
1107EMIT_INSTR_PLUS_ICEBP movlpd, FSxBX, XMM11
1108EMIT_INSTR_PLUS_ICEBP vmovlpd, XMM10, XMM14, FSxBX
1109EMIT_INSTR_PLUS_ICEBP vmovlpd, FSxBX, XMM9
1110 %endif
1111
1112;
1113; [V]MOVHPS
1114;
1115EMIT_INSTR_PLUS_ICEBP movhps, XMM1, FSxBX
1116EMIT_INSTR_PLUS_ICEBP movhps, FSxBX, XMM1
1117EMIT_INSTR_PLUS_ICEBP vmovhps, XMM1, XMM2, FSxBX
1118EMIT_INSTR_PLUS_ICEBP vmovhps, FSxBX, XMM1
1119 %if TMPL_BITS == 64
1120EMIT_INSTR_PLUS_ICEBP movhps, XMM8, FSxBX
1121EMIT_INSTR_PLUS_ICEBP movhps, FSxBX, XMM11
1122EMIT_INSTR_PLUS_ICEBP vmovhps, XMM10, XMM14, FSxBX
1123EMIT_INSTR_PLUS_ICEBP vmovhps, FSxBX, XMM9
1124 %endif
1125
1126;
1127; [V]MOVHPD
1128;
1129EMIT_INSTR_PLUS_ICEBP movhpd, XMM1, FSxBX
1130EMIT_INSTR_PLUS_ICEBP movhpd, FSxBX, XMM1
1131EMIT_INSTR_PLUS_ICEBP vmovhpd, XMM1, XMM2, FSxBX
1132EMIT_INSTR_PLUS_ICEBP vmovhpd, FSxBX, XMM1
1133 %if TMPL_BITS == 64
1134EMIT_INSTR_PLUS_ICEBP movhpd, XMM8, FSxBX
1135EMIT_INSTR_PLUS_ICEBP movhpd, FSxBX, XMM11
1136EMIT_INSTR_PLUS_ICEBP vmovhpd, XMM10, XMM14, FSxBX
1137EMIT_INSTR_PLUS_ICEBP vmovhpd, FSxBX, XMM9
1138 %endif
1139
1140;
1141; [V]MOVHLPS
1142;
1143EMIT_INSTR_PLUS_ICEBP movhlps, XMM1, XMM2
1144EMIT_INSTR_PLUS_ICEBP vmovhlps, XMM1, XMM2, XMM3
1145 %if TMPL_BITS == 64
1146EMIT_INSTR_PLUS_ICEBP movhlps, XMM8, XMM12
1147EMIT_INSTR_PLUS_ICEBP vmovhlps, XMM10, XMM14, XMM12
1148 %endif
1149
1150;
1151; [V]MOVSLDUP
1152;
1153EMIT_INSTR_PLUS_ICEBP movsldup, XMM1, XMM2
1154EMIT_INSTR_PLUS_ICEBP movsldup, XMM1, FSxBX
1155EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM1, XMM2
1156EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM1, FSxBX
1157EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM1, YMM2
1158EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM1, FSxBX
1159 %if TMPL_BITS == 64
1160EMIT_INSTR_PLUS_ICEBP movsldup, XMM8, XMM12
1161EMIT_INSTR_PLUS_ICEBP movsldup, XMM10, FSxBX
1162EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM7, XMM14
1163EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM11, FSxBX
1164EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM12, YMM8
1165EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM12, FSxBX
1166 %endif
1167
1168;
1169; [V]MOVSHDUP
1170;
1171EMIT_INSTR_PLUS_ICEBP movshdup, XMM1, XMM2
1172EMIT_INSTR_PLUS_ICEBP movshdup, XMM1, FSxBX
1173EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM1, XMM2
1174EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM1, FSxBX
1175EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM1, YMM2
1176EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM1, FSxBX
1177 %if TMPL_BITS == 64
1178EMIT_INSTR_PLUS_ICEBP movshdup, XMM8, XMM12
1179EMIT_INSTR_PLUS_ICEBP movshdup, XMM10, FSxBX
1180EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM7, XMM14
1181EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM11, FSxBX
1182EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM12, YMM8
1183EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM12, FSxBX
1184 %endif
1185
1186;
1187; [V]MOVDDUP
1188;
1189EMIT_INSTR_PLUS_ICEBP movddup, XMM1, XMM2
1190EMIT_INSTR_PLUS_ICEBP movddup, XMM1, FSxBX
1191EMIT_INSTR_PLUS_ICEBP vmovddup, XMM1, XMM2
1192EMIT_INSTR_PLUS_ICEBP vmovddup, XMM1, FSxBX
1193EMIT_INSTR_PLUS_ICEBP vmovddup, YMM1, YMM2
1194EMIT_INSTR_PLUS_ICEBP vmovddup, YMM1, FSxBX
1195 %if TMPL_BITS == 64
1196EMIT_INSTR_PLUS_ICEBP movddup, XMM8, XMM12
1197EMIT_INSTR_PLUS_ICEBP movddup, XMM10, FSxBX
1198EMIT_INSTR_PLUS_ICEBP vmovddup, XMM7, XMM14
1199EMIT_INSTR_PLUS_ICEBP vmovddup, XMM11, FSxBX
1200EMIT_INSTR_PLUS_ICEBP vmovddup, YMM12, YMM8
1201EMIT_INSTR_PLUS_ICEBP vmovddup, YMM12, FSxBX
1202 %endif
1203
1204;
1205; [V]MOVAPS
1206;
1207EMIT_INSTR_PLUS_ICEBP movaps, XMM1, XMM2
1208EMIT_INSTR_PLUS_ICEBP movaps, XMM1, FSxBX
1209EMIT_INSTR_PLUS_ICEBP vmovaps, XMM1, XMM2
1210EMIT_INSTR_PLUS_ICEBP vmovaps, XMM1, FSxBX
1211EMIT_INSTR_PLUS_ICEBP vmovaps, YMM1, YMM2
1212EMIT_INSTR_PLUS_ICEBP vmovaps, YMM1, FSxBX
1213 %if TMPL_BITS == 64
1214EMIT_INSTR_PLUS_ICEBP movaps, XMM8, XMM12
1215EMIT_INSTR_PLUS_ICEBP movaps, XMM10, FSxBX
1216EMIT_INSTR_PLUS_ICEBP vmovaps, XMM7, XMM14
1217EMIT_INSTR_PLUS_ICEBP vmovaps, XMM11, FSxBX
1218EMIT_INSTR_PLUS_ICEBP vmovaps, YMM12, YMM8
1219EMIT_INSTR_PLUS_ICEBP vmovaps, YMM12, FSxBX
1220 %endif
1221
1222EMIT_INSTR_PLUS_ICEBP movapd, XMM1, XMM2
1223EMIT_INSTR_PLUS_ICEBP movapd, XMM1, FSxBX
1224EMIT_INSTR_PLUS_ICEBP vmovapd, XMM1, XMM2
1225EMIT_INSTR_PLUS_ICEBP vmovapd, XMM1, FSxBX
1226EMIT_INSTR_PLUS_ICEBP vmovapd, YMM1, YMM2
1227EMIT_INSTR_PLUS_ICEBP vmovapd, YMM1, FSxBX
1228 %if TMPL_BITS == 64
1229EMIT_INSTR_PLUS_ICEBP movapd, XMM8, XMM12
1230EMIT_INSTR_PLUS_ICEBP movapd, XMM10, FSxBX
1231EMIT_INSTR_PLUS_ICEBP vmovapd, XMM7, XMM14
1232EMIT_INSTR_PLUS_ICEBP vmovapd, XMM11, FSxBX
1233EMIT_INSTR_PLUS_ICEBP vmovapd, YMM12, YMM8
1234EMIT_INSTR_PLUS_ICEBP vmovapd, YMM12, FSxBX
1235 %endif
1236
1237;
1238; [V]MOVD
1239;
1240EMIT_INSTR_PLUS_ICEBP movd, MM1, EDX
1241EMIT_INSTR_PLUS_ICEBP movd, MM1, FSxBX
1242EMIT_INSTR_PLUS_ICEBP movd, EAX, MM1
1243EMIT_INSTR_PLUS_ICEBP movd, FSxBX, MM1
1244 %if TMPL_BITS == 64
1245EMIT_INSTR_PLUS_ICEBP movd, MM1, R9D
1246EMIT_INSTR_PLUS_ICEBP movd, R10D, MM0
1247 %endif
1248
1249EMIT_INSTR_PLUS_ICEBP movd, XMM1, EAX
1250EMIT_INSTR_PLUS_ICEBP movd, XMM1, FSxBX
1251EMIT_INSTR_PLUS_ICEBP movd, FSxBX, XMM1
1252EMIT_INSTR_PLUS_ICEBP movd, EAX, XMM1
1253 %if TMPL_BITS == 64
1254EMIT_INSTR_PLUS_ICEBP movd, XMM9, R8D
1255EMIT_INSTR_PLUS_ICEBP movd, R8D, XMM9
1256EMIT_INSTR_PLUS_ICEBP movd, XMM9, FSxBX
1257EMIT_INSTR_PLUS_ICEBP movd, FSxBX, XMM9
1258 %endif
1259
1260EMIT_INSTR_PLUS_ICEBP vmovd, XMM1, EAX
1261EMIT_INSTR_PLUS_ICEBP vmovd, XMM1, FSxBX
1262EMIT_INSTR_PLUS_ICEBP vmovd, FSxBX, XMM1
1263EMIT_INSTR_PLUS_ICEBP vmovd, EDX, XMM1
1264 %if TMPL_BITS == 64
1265EMIT_INSTR_PLUS_ICEBP vmovd, XMM9, R9D
1266EMIT_INSTR_PLUS_ICEBP vmovd, R8D, XMM9
1267EMIT_INSTR_PLUS_ICEBP vmovd, XMM9, FSxBX
1268EMIT_INSTR_PLUS_ICEBP vmovd, FSxBX, XMM9
1269 %endif
1270
1271;
1272; [V]MOVQ - some hand coded stuff here as the assembler prefers the 7f/6f variants.
1273;
1274EMIT_INSTR_PLUS_ICEBP movq, MM1, MM2
1275EMIT_INSTR_PLUS_ICEBP movq, MM1, FSxBX
1276EMIT_INSTR_PLUS_ICEBP movq, FSxBX, MM1
1277 %if TMPL_BITS == 64
1278EMIT_INSTR_PLUS_ICEBP movq, R9, MM1
1279EMIT_INSTR_PLUS_ICEBP movq, MM1, R9
1280EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_MM1_FSxBX, FSxBX_PFX, 48h, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1281EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_MM1, FSxBX_PFX, 48h, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1282 %endif
1283
1284EMIT_INSTR_PLUS_ICEBP movq, XMM1, XMM2
1285EMIT_INSTR_PLUS_ICEBP movq, XMM1, FSxBX
1286EMIT_INSTR_PLUS_ICEBP movq, FSxBX, XMM1
1287 %if TMPL_BITS == 64
1288EMIT_INSTR_PLUS_ICEBP movq, XMM9, R8
1289EMIT_INSTR_PLUS_ICEBP movq, R8, XMM9
1290EMIT_INSTR_PLUS_ICEBP movq, XMM9, FSxBX
1291EMIT_INSTR_PLUS_ICEBP movq, FSxBX, XMM9
1292EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_XMM1_FSxBX, FSxBX_PFX, 66h, 48h, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1293EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_XMM9_FSxBX, FSxBX_PFX, 66h, 4ch, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1294EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_XMM1, FSxBX_PFX, 66h, 48h, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1295EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_XMM9, FSxBX_PFX, 66h, 4ch, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1296 %endif
1297
1298EMIT_INSTR_PLUS_ICEBP vmovq, XMM1, XMM2
1299EMIT_INSTR_PLUS_ICEBP vmovq, XMM1, FSxBX
1300EMIT_INSTR_PLUS_ICEBP_BYTES 06e_vmovq_XMM1_FSxBX, FSxBX_PFX, 0c4h, 0e1h, 0f9h, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1301EMIT_INSTR_PLUS_ICEBP vmovq, FSxBX, XMM1
1302EMIT_INSTR_PLUS_ICEBP_BYTES 07e_vmovq_FSxBX_XMM1, FSxBX_PFX, 0c4h, 0e1h, 0f9h, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1303 %if TMPL_BITS == 64
1304EMIT_INSTR_PLUS_ICEBP vmovq, XMM9, R8
1305EMIT_INSTR_PLUS_ICEBP vmovq, R8, XMM9
1306EMIT_INSTR_PLUS_ICEBP vmovq, XMM9, FSxBX
1307EMIT_INSTR_PLUS_ICEBP vmovq, FSxBX, XMM9
1308EMIT_INSTR_PLUS_ICEBP_BYTES 06e_vmovq_XMM9_FSxBX, FSxBX_PFX, 0c4h, 061h, 0f9h, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1309EMIT_INSTR_PLUS_ICEBP_BYTES 07e_vmovq_FSxBX_XMM9, FSxBX_PFX, 0c4h, 061h, 0f9h, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT)
1310 %endif
1311
1312;
1313; [V]MOVDQU - not testing the 2nd register variant.
1314;
1315EMIT_INSTR_PLUS_ICEBP movdqu, XMM1, XMM2
1316EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqu_XMM1_XMM2, 0f3h, 00fh, 07fh, X86_MODRM_MAKE(3, 2, 1)
1317EMIT_INSTR_PLUS_ICEBP movdqu, XMM1, FSxBX
1318EMIT_INSTR_PLUS_ICEBP movdqu, FSxBX, XMM1
1319EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM1, XMM2 ; C5 FA 6F CA
1320EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqu_XMM1_XMM2, 0c5h, 0fah, 07fh, X86_MODRM_MAKE(3, 2, 1)
1321EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM1, FSxBX
1322EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, XMM1
1323EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM1, YMM2 ; C5 FE 6F CA
1324EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqu_YMM1_YMM2, 0c5h, 0feh, 07fh, X86_MODRM_MAKE(3, 2, 1)
1325EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM1, FSxBX
1326EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, YMM1
1327 %if TMPL_BITS == 64
1328EMIT_INSTR_PLUS_ICEBP movdqu, XMM8, XMM12 ; F3 45 0F 6F C4
1329EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqu_XMM8_XMM12, 0f3h, 045h, 00fh, 07fh, X86_MODRM_MAKE(3, 4, 0)
1330EMIT_INSTR_PLUS_ICEBP movdqu, XMM10, FSxBX
1331EMIT_INSTR_PLUS_ICEBP movdqu, FSxBX, XMM10
1332EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM7, XMM14
1333EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM11, FSxBX
1334EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, XMM11
1335EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM12, YMM8
1336EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM12, FSxBX
1337EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, YMM12
1338 %endif
1339
1340;
1341; [V]MOVDQA - not testing the 2nd register variant.
1342;
1343EMIT_INSTR_PLUS_ICEBP movdqa, XMM1, XMM2
1344EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqa_XMM1_XMM2, 066h, 00fh, 07fh, X86_MODRM_MAKE(3, 2, 1)
1345EMIT_INSTR_PLUS_ICEBP movdqa, XMM1, FSxBX
1346EMIT_INSTR_PLUS_ICEBP movdqa, FSxBX, XMM1
1347EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM1, XMM2
1348EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_XMM1_XMM2, 0c5h, 0f9h, 07fh, X86_MODRM_MAKE(3, 2, 1)
1349EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM1, FSxBX
1350EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, XMM1
1351EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM1, YMM2
1352EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_YMM1_YMM2, 0c5h, 0fdh, 07fh, X86_MODRM_MAKE(3, 2, 1)
1353EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM1, FSxBX
1354EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, YMM1
1355 %if TMPL_BITS == 64
1356EMIT_INSTR_PLUS_ICEBP movdqa, XMM8, XMM12 ; 66 45 0F 6F C4
1357EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqa_XMM8_XMM12, 066h, 045h, 00fh, 07fh, X86_MODRM_MAKE(3, 4, 0)
1358EMIT_INSTR_PLUS_ICEBP movdqa, XMM10, FSxBX
1359EMIT_INSTR_PLUS_ICEBP movdqa, FSxBX, XMM10
1360EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM8, XMM14 ; C4 C1 79 6F FE
1361EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_XMM8_XMM14, 0c4h, 041h, 79h, 07fh, X86_MODRM_MAKE(3, 6, 0)
1362EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM11, FSxBX
1363EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, XMM11
1364EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM12, YMM8
1365EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_YMM12_YMM8, 0c4h, 041h, 7dh, 07fh, X86_MODRM_MAKE(3, 0, 4)
1366EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM12, FSxBX
1367EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, YMM12
1368 %endif
1369
1370;
1371; [V]PTEST
1372;
1373EMIT_INSTR_PLUS_ICEBP ptest, XMM1, XMM2
1374EMIT_INSTR_PLUS_ICEBP ptest, XMM1, FSxBX
1375EMIT_INSTR_PLUS_ICEBP vptest, XMM1, XMM2
1376EMIT_INSTR_PLUS_ICEBP vptest, XMM1, FSxBX
1377EMIT_INSTR_PLUS_ICEBP vptest, YMM1, YMM2
1378EMIT_INSTR_PLUS_ICEBP vptest, YMM1, FSxBX
1379 %if TMPL_BITS == 64
1380EMIT_INSTR_PLUS_ICEBP ptest, XMM9, XMM8
1381EMIT_INSTR_PLUS_ICEBP ptest, XMM9, FSxBX
1382EMIT_INSTR_PLUS_ICEBP vptest, XMM9, XMM8
1383EMIT_INSTR_PLUS_ICEBP vptest, XMM9, FSxBX
1384EMIT_INSTR_PLUS_ICEBP vptest, YMM9, YMM8
1385EMIT_INSTR_PLUS_ICEBP vptest, YMM9, FSxBX
1386 %endif
1387
1388
1389%endif ; BS3_INSTANTIATING_CMN
1390
1391%include "bs3kit-template-footer.mac" ; reset environment
1392
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette