VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 88638

Last change on this file since 88638 was 87740, checked in by vboxsync, 4 years ago

SUP/Makefile-wrapper.gmk,iprt/asmdefs.mac: Call objtool on our .r0 object when ORCs are roaming freely accross our Linux kernel. Otherwise we won't get any callstacks. bugref:9937

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 81.8 KB
Line 
1; $Id: IEMAllAImpl.asm 87740 2021-02-12 16:36:34Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2020 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20; Header Files ;
21;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28; Defined Constants And Macros ;
29;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175%else
176 ; x86
177 %macro PROLOGUE_1_ARGS 0
178 push edi
179 %endmacro
180 %macro EPILOGUE_1_ARGS 0
181 pop edi
182 ret 0
183 %endmacro
184 %macro EPILOGUE_1_ARGS_EX 1
185 pop edi
186 ret %1
187 %endmacro
188
189 %macro PROLOGUE_2_ARGS 0
190 push edi
191 %endmacro
192 %macro EPILOGUE_2_ARGS 0
193 pop edi
194 ret 0
195 %endmacro
196 %macro EPILOGUE_2_ARGS_EX 1
197 pop edi
198 ret %1
199 %endmacro
200
201 %macro PROLOGUE_3_ARGS 0
202 push ebx
203 mov ebx, [esp + 4 + 4]
204 push edi
205 %endmacro
206 %macro EPILOGUE_3_ARGS_EX 1
207 %if (%1) < 4
208 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
209 %endif
210 pop edi
211 pop ebx
212 ret %1
213 %endmacro
214 %macro EPILOGUE_3_ARGS 0
215 EPILOGUE_3_ARGS_EX 4
216 %endmacro
217
218 %macro PROLOGUE_4_ARGS 0
219 push ebx
220 push edi
221 push esi
222 mov ebx, [esp + 12 + 4 + 0]
223 mov esi, [esp + 12 + 4 + 4]
224 %endmacro
225 %macro EPILOGUE_4_ARGS_EX 1
226 %if (%1) < 8
227 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
228 %endif
229 pop esi
230 pop edi
231 pop ebx
232 ret %1
233 %endmacro
234 %macro EPILOGUE_4_ARGS 0
235 EPILOGUE_4_ARGS_EX 8
236 %endmacro
237
238 %define A0 ecx
239 %define A0_32 ecx
240 %define A0_16 cx
241 %define A0_8 cl
242
243 %define A1 edx
244 %define A1_32 edx
245 %define A1_16 dx
246 %define A1_8 dl
247
248 %define A2 ebx
249 %define A2_32 ebx
250 %define A2_16 bx
251 %define A2_8 bl
252
253 %define A3 esi
254 %define A3_32 esi
255 %define A3_16 si
256
257 %define T0 eax
258 %define T0_32 eax
259 %define T0_16 ax
260 %define T0_8 al
261
262 %define T1 edi
263 %define T1_32 edi
264 %define T1_16 di
265%endif
266
267
268;;
269; Load the relevant flags from [%1] if there are undefined flags (%3).
270;
271; @remarks Clobbers T0, stack. Changes EFLAGS.
272; @param A2 The register pointing to the flags.
273; @param 1 The parameter (A0..A3) pointing to the eflags.
274; @param 2 The set of modified flags.
275; @param 3 The set of undefined flags.
276;
277%macro IEM_MAYBE_LOAD_FLAGS 3
278 ;%if (%3) != 0
279 pushf ; store current flags
280 mov T0_32, [%1] ; load the guest flags
281 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
282 and T0_32, (%2 | %3) ; select the modified and undefined flags.
283 or [xSP], T0 ; merge guest flags with host flags.
284 popf ; load the mixed flags.
285 ;%endif
286%endmacro
287
288;;
289; Update the flag.
290;
291; @remarks Clobbers T0, T1, stack.
292; @param 1 The register pointing to the EFLAGS.
293; @param 2 The mask of modified flags to save.
294; @param 3 The mask of undefined flags to (maybe) save.
295;
296%macro IEM_SAVE_FLAGS 3
297 %if (%2 | %3) != 0
298 pushf
299 pop T1
300 mov T0_32, [%1] ; flags
301 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
302 and T1_32, (%2 | %3) ; select the modified and undefined flags.
303 or T0_32, T1_32 ; combine the flags.
304 mov [%1], T0_32 ; save the flags.
305 %endif
306%endmacro
307
308
309;;
310; Macro for implementing a binary operator.
311;
312; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
313; variants, except on 32-bit system where the 64-bit accesses requires hand
314; coding.
315;
316; All the functions takes a pointer to the destination memory operand in A0,
317; the source register operand in A1 and a pointer to eflags in A2.
318;
319; @param 1 The instruction mnemonic.
320; @param 2 Non-zero if there should be a locked version.
321; @param 3 The modified flags.
322; @param 4 The undefined flags.
323;
324%macro IEMIMPL_BIN_OP 4
325BEGINCODE
326BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
327 PROLOGUE_3_ARGS
328 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
329 %1 byte [A0], A1_8
330 IEM_SAVE_FLAGS A2, %3, %4
331 EPILOGUE_3_ARGS
332ENDPROC iemAImpl_ %+ %1 %+ _u8
333
334BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
335 PROLOGUE_3_ARGS
336 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337 %1 word [A0], A1_16
338 IEM_SAVE_FLAGS A2, %3, %4
339 EPILOGUE_3_ARGS
340ENDPROC iemAImpl_ %+ %1 %+ _u16
341
342BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
343 PROLOGUE_3_ARGS
344 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345 %1 dword [A0], A1_32
346 IEM_SAVE_FLAGS A2, %3, %4
347 EPILOGUE_3_ARGS
348ENDPROC iemAImpl_ %+ %1 %+ _u32
349
350 %ifdef RT_ARCH_AMD64
351BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
352 PROLOGUE_3_ARGS
353 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
354 %1 qword [A0], A1
355 IEM_SAVE_FLAGS A2, %3, %4
356 EPILOGUE_3_ARGS_EX 8
357ENDPROC iemAImpl_ %+ %1 %+ _u64
358 %endif ; RT_ARCH_AMD64
359
360 %if %2 != 0 ; locked versions requested?
361
362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
363 PROLOGUE_3_ARGS
364 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
365 lock %1 byte [A0], A1_8
366 IEM_SAVE_FLAGS A2, %3, %4
367 EPILOGUE_3_ARGS
368ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
369
370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
371 PROLOGUE_3_ARGS
372 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
373 lock %1 word [A0], A1_16
374 IEM_SAVE_FLAGS A2, %3, %4
375 EPILOGUE_3_ARGS
376ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
377
378BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
379 PROLOGUE_3_ARGS
380 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
381 lock %1 dword [A0], A1_32
382 IEM_SAVE_FLAGS A2, %3, %4
383 EPILOGUE_3_ARGS
384ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
385
386 %ifdef RT_ARCH_AMD64
387BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
388 PROLOGUE_3_ARGS
389 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
390 lock %1 qword [A0], A1
391 IEM_SAVE_FLAGS A2, %3, %4
392 EPILOGUE_3_ARGS_EX 8
393ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
394 %endif ; RT_ARCH_AMD64
395 %endif ; locked
396%endmacro
397
398; instr,lock,modified-flags.
399IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
403IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
406IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
407IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
408
409
410;;
411; Macro for implementing a bit operator.
412;
413; This will generate code for the 16, 32 and 64 bit accesses with locked
414; variants, except on 32-bit system where the 64-bit accesses requires hand
415; coding.
416;
417; All the functions takes a pointer to the destination memory operand in A0,
418; the source register operand in A1 and a pointer to eflags in A2.
419;
420; @param 1 The instruction mnemonic.
421; @param 2 Non-zero if there should be a locked version.
422; @param 3 The modified flags.
423; @param 4 The undefined flags.
424;
425%macro IEMIMPL_BIT_OP 4
426BEGINCODE
427BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
428 PROLOGUE_3_ARGS
429 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
430 %1 word [A0], A1_16
431 IEM_SAVE_FLAGS A2, %3, %4
432 EPILOGUE_3_ARGS
433ENDPROC iemAImpl_ %+ %1 %+ _u16
434
435BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
436 PROLOGUE_3_ARGS
437 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
438 %1 dword [A0], A1_32
439 IEM_SAVE_FLAGS A2, %3, %4
440 EPILOGUE_3_ARGS
441ENDPROC iemAImpl_ %+ %1 %+ _u32
442
443 %ifdef RT_ARCH_AMD64
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 qword [A0], A1
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS_EX 8
450ENDPROC iemAImpl_ %+ %1 %+ _u64
451 %endif ; RT_ARCH_AMD64
452
453 %if %2 != 0 ; locked versions requested?
454
455BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
456 PROLOGUE_3_ARGS
457 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
458 lock %1 word [A0], A1_16
459 IEM_SAVE_FLAGS A2, %3, %4
460 EPILOGUE_3_ARGS
461ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
462
463BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
464 PROLOGUE_3_ARGS
465 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
466 lock %1 dword [A0], A1_32
467 IEM_SAVE_FLAGS A2, %3, %4
468 EPILOGUE_3_ARGS
469ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
470
471 %ifdef RT_ARCH_AMD64
472BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
473 PROLOGUE_3_ARGS
474 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
475 lock %1 qword [A0], A1
476 IEM_SAVE_FLAGS A2, %3, %4
477 EPILOGUE_3_ARGS_EX 8
478ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
479 %endif ; RT_ARCH_AMD64
480 %endif ; locked
481%endmacro
482IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
486
487;;
488; Macro for implementing a bit search operator.
489;
490; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
491; system where the 64-bit accesses requires hand coding.
492;
493; All the functions takes a pointer to the destination memory operand in A0,
494; the source register operand in A1 and a pointer to eflags in A2.
495;
496; @param 1 The instruction mnemonic.
497; @param 2 The modified flags.
498; @param 3 The undefined flags.
499;
500%macro IEMIMPL_BIT_OP 3
501BEGINCODE
502BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
503 PROLOGUE_3_ARGS
504 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
505 %1 T0_16, A1_16
506 jz .unchanged_dst
507 mov [A0], T0_16
508.unchanged_dst:
509 IEM_SAVE_FLAGS A2, %2, %3
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
516 %1 T0_32, A1_32
517 jz .unchanged_dst
518 mov [A0], T0_32
519.unchanged_dst:
520 IEM_SAVE_FLAGS A2, %2, %3
521 EPILOGUE_3_ARGS
522ENDPROC iemAImpl_ %+ %1 %+ _u32
523
524 %ifdef RT_ARCH_AMD64
525BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
526 PROLOGUE_3_ARGS
527 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
528 %1 T0, A1
529 jz .unchanged_dst
530 mov [A0], T0
531.unchanged_dst:
532 IEM_SAVE_FLAGS A2, %2, %3
533 EPILOGUE_3_ARGS_EX 8
534ENDPROC iemAImpl_ %+ %1 %+ _u64
535 %endif ; RT_ARCH_AMD64
536%endmacro
537IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
539
540
541;
542; IMUL is also a similar but yet different case (no lock, no mem dst).
543; The rDX:rAX variant of imul is handled together with mul further down.
544;
545BEGINCODE
546BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
547 PROLOGUE_3_ARGS
548 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
549 imul A1_16, word [A0]
550 mov [A0], A1_16
551 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
552 EPILOGUE_3_ARGS
553ENDPROC iemAImpl_imul_two_u16
554
555BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
556 PROLOGUE_3_ARGS
557 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
558 imul A1_32, dword [A0]
559 mov [A0], A1_32
560 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
561 EPILOGUE_3_ARGS
562ENDPROC iemAImpl_imul_two_u32
563
564%ifdef RT_ARCH_AMD64
565BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
566 PROLOGUE_3_ARGS
567 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
568 imul A1, qword [A0]
569 mov [A0], A1
570 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
571 EPILOGUE_3_ARGS_EX 8
572ENDPROC iemAImpl_imul_two_u64
573%endif ; RT_ARCH_AMD64
574
575
576;
577; XCHG for memory operands. This implies locking. No flag changes.
578;
579; Each function takes two arguments, first the pointer to the memory,
580; then the pointer to the register. They all return void.
581;
582BEGINCODE
583BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
584 PROLOGUE_2_ARGS
585 mov T0_8, [A1]
586 xchg [A0], T0_8
587 mov [A1], T0_8
588 EPILOGUE_2_ARGS
589ENDPROC iemAImpl_xchg_u8
590
591BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
592 PROLOGUE_2_ARGS
593 mov T0_16, [A1]
594 xchg [A0], T0_16
595 mov [A1], T0_16
596 EPILOGUE_2_ARGS
597ENDPROC iemAImpl_xchg_u16
598
599BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
600 PROLOGUE_2_ARGS
601 mov T0_32, [A1]
602 xchg [A0], T0_32
603 mov [A1], T0_32
604 EPILOGUE_2_ARGS
605ENDPROC iemAImpl_xchg_u32
606
607%ifdef RT_ARCH_AMD64
608BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
609 PROLOGUE_2_ARGS
610 mov T0, [A1]
611 xchg [A0], T0
612 mov [A1], T0
613 EPILOGUE_2_ARGS
614ENDPROC iemAImpl_xchg_u64
615%endif
616
617
618;
619; XADD for memory operands.
620;
621; Each function takes three arguments, first the pointer to the
622; memory/register, then the pointer to the register, and finally a pointer to
623; eflags. They all return void.
624;
625BEGINCODE
626BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
627 PROLOGUE_3_ARGS
628 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
629 mov T0_8, [A1]
630 xadd [A0], T0_8
631 mov [A1], T0_8
632 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
633 EPILOGUE_3_ARGS
634ENDPROC iemAImpl_xadd_u8
635
636BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
637 PROLOGUE_3_ARGS
638 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
639 mov T0_16, [A1]
640 xadd [A0], T0_16
641 mov [A1], T0_16
642 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
643 EPILOGUE_3_ARGS
644ENDPROC iemAImpl_xadd_u16
645
646BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
647 PROLOGUE_3_ARGS
648 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
649 mov T0_32, [A1]
650 xadd [A0], T0_32
651 mov [A1], T0_32
652 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
653 EPILOGUE_3_ARGS
654ENDPROC iemAImpl_xadd_u32
655
656%ifdef RT_ARCH_AMD64
657BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
658 PROLOGUE_3_ARGS
659 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
660 mov T0, [A1]
661 xadd [A0], T0
662 mov [A1], T0
663 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
664 EPILOGUE_3_ARGS
665ENDPROC iemAImpl_xadd_u64
666%endif ; RT_ARCH_AMD64
667
668BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
669 PROLOGUE_3_ARGS
670 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
671 mov T0_8, [A1]
672 lock xadd [A0], T0_8
673 mov [A1], T0_8
674 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
675 EPILOGUE_3_ARGS
676ENDPROC iemAImpl_xadd_u8_locked
677
678BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
679 PROLOGUE_3_ARGS
680 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
681 mov T0_16, [A1]
682 lock xadd [A0], T0_16
683 mov [A1], T0_16
684 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
685 EPILOGUE_3_ARGS
686ENDPROC iemAImpl_xadd_u16_locked
687
688BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
689 PROLOGUE_3_ARGS
690 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
691 mov T0_32, [A1]
692 lock xadd [A0], T0_32
693 mov [A1], T0_32
694 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
695 EPILOGUE_3_ARGS
696ENDPROC iemAImpl_xadd_u32_locked
697
698%ifdef RT_ARCH_AMD64
699BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
700 PROLOGUE_3_ARGS
701 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
702 mov T0, [A1]
703 lock xadd [A0], T0
704 mov [A1], T0
705 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
706 EPILOGUE_3_ARGS
707ENDPROC iemAImpl_xadd_u64_locked
708%endif ; RT_ARCH_AMD64
709
710
711;
712; CMPXCHG8B.
713;
714; These are tricky register wise, so the code is duplicated for each calling
715; convention.
716;
717; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
718;
719; C-proto:
720; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
721; uint32_t *pEFlags));
722;
723; Note! Identical to iemAImpl_cmpxchg16b.
724;
725BEGINCODE
726BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
727%ifdef RT_ARCH_AMD64
728 %ifdef ASM_CALL64_MSC
729 push rbx
730
731 mov r11, rdx ; pu64EaxEdx (is also T1)
732 mov r10, rcx ; pu64Dst
733
734 mov ebx, [r8]
735 mov ecx, [r8 + 4]
736 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
737 mov eax, [r11]
738 mov edx, [r11 + 4]
739
740 lock cmpxchg8b [r10]
741
742 mov [r11], eax
743 mov [r11 + 4], edx
744 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
745
746 pop rbx
747 ret
748 %else
749 push rbx
750
751 mov r10, rcx ; pEFlags
752 mov r11, rdx ; pu64EbxEcx (is also T1)
753
754 mov ebx, [r11]
755 mov ecx, [r11 + 4]
756 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
757 mov eax, [rsi]
758 mov edx, [rsi + 4]
759
760 lock cmpxchg8b [rdi]
761
762 mov [rsi], eax
763 mov [rsi + 4], edx
764 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
765
766 pop rbx
767 ret
768
769 %endif
770%else
771 push esi
772 push edi
773 push ebx
774 push ebp
775
776 mov edi, ecx ; pu64Dst
777 mov esi, edx ; pu64EaxEdx
778 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
779 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
780
781 mov ebx, [ecx]
782 mov ecx, [ecx + 4]
783 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
784 mov eax, [esi]
785 mov edx, [esi + 4]
786
787 lock cmpxchg8b [edi]
788
789 mov [esi], eax
790 mov [esi + 4], edx
791 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
792
793 pop ebp
794 pop ebx
795 pop edi
796 pop esi
797 ret 8
798%endif
799ENDPROC iemAImpl_cmpxchg8b
800
801BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
802 ; Lazy bird always lock prefixes cmpxchg8b.
803 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
804ENDPROC iemAImpl_cmpxchg8b_locked
805
806%ifdef RT_ARCH_AMD64
807
808;
809; CMPXCHG16B.
810;
811; These are tricky register wise, so the code is duplicated for each calling
812; convention.
813;
814; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
815;
816; C-proto:
817; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
818; uint32_t *pEFlags));
819;
820; Note! Identical to iemAImpl_cmpxchg8b.
821;
822BEGINCODE
823BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
824 %ifdef ASM_CALL64_MSC
825 push rbx
826
827 mov r11, rdx ; pu64RaxRdx (is also T1)
828 mov r10, rcx ; pu64Dst
829
830 mov rbx, [r8]
831 mov rcx, [r8 + 8]
832 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
833 mov rax, [r11]
834 mov rdx, [r11 + 8]
835
836 lock cmpxchg16b [r10]
837
838 mov [r11], rax
839 mov [r11 + 8], rdx
840 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
841
842 pop rbx
843 ret
844 %else
845 push rbx
846
847 mov r10, rcx ; pEFlags
848 mov r11, rdx ; pu64RbxRcx (is also T1)
849
850 mov rbx, [r11]
851 mov rcx, [r11 + 8]
852 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
853 mov rax, [rsi]
854 mov rdx, [rsi + 8]
855
856 lock cmpxchg16b [rdi]
857
858 mov [rsi], eax
859 mov [rsi + 8], edx
860 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
861
862 pop rbx
863 ret
864
865 %endif
866ENDPROC iemAImpl_cmpxchg16b
867
868BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
869 ; Lazy bird always lock prefixes cmpxchg8b.
870 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
871ENDPROC iemAImpl_cmpxchg16b_locked
872
873%endif ; RT_ARCH_AMD64
874
875
876;
877; CMPXCHG.
878;
879; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
880;
881; C-proto:
882; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
883;
884BEGINCODE
885%macro IEMIMPL_CMPXCHG 2
886BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
887 PROLOGUE_4_ARGS
888 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
889 mov al, [A1]
890 %1 cmpxchg [A0], A2_8
891 mov [A1], al
892 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
893 EPILOGUE_4_ARGS
894ENDPROC iemAImpl_cmpxchg_u8 %+ %2
895
896BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
897 PROLOGUE_4_ARGS
898 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
899 mov ax, [A1]
900 %1 cmpxchg [A0], A2_16
901 mov [A1], ax
902 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
903 EPILOGUE_4_ARGS
904ENDPROC iemAImpl_cmpxchg_u16 %+ %2
905
906BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
907 PROLOGUE_4_ARGS
908 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
909 mov eax, [A1]
910 %1 cmpxchg [A0], A2_32
911 mov [A1], eax
912 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
913 EPILOGUE_4_ARGS
914ENDPROC iemAImpl_cmpxchg_u32 %+ %2
915
916BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
917%ifdef RT_ARCH_AMD64
918 PROLOGUE_4_ARGS
919 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
920 mov rax, [A1]
921 %1 cmpxchg [A0], A2
922 mov [A1], rax
923 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
924 EPILOGUE_4_ARGS
925%else
926 ;
927 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
928 ;
929 push esi
930 push edi
931 push ebx
932 push ebp
933
934 mov edi, ecx ; pu64Dst
935 mov esi, edx ; pu64Rax
936 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
937 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
938
939 mov ebx, [ecx]
940 mov ecx, [ecx + 4]
941 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
942 mov eax, [esi]
943 mov edx, [esi + 4]
944
945 lock cmpxchg8b [edi]
946
947 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
948 jz .cmpxchg8b_not_equal
949 cmp eax, eax ; just set the other flags.
950.store:
951 mov [esi], eax
952 mov [esi + 4], edx
953 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
954
955 pop ebp
956 pop ebx
957 pop edi
958 pop esi
959 ret 8
960
961.cmpxchg8b_not_equal:
962 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
963 jne .store
964 cmp [esi], eax
965 jmp .store
966
967%endif
968ENDPROC iemAImpl_cmpxchg_u64 %+ %2
969%endmacro ; IEMIMPL_CMPXCHG
970
971IEMIMPL_CMPXCHG , ,
972IEMIMPL_CMPXCHG lock, _locked
973
974;;
975; Macro for implementing a unary operator.
976;
977; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
978; variants, except on 32-bit system where the 64-bit accesses requires hand
979; coding.
980;
981; All the functions takes a pointer to the destination memory operand in A0,
982; the source register operand in A1 and a pointer to eflags in A2.
983;
984; @param 1 The instruction mnemonic.
985; @param 2 The modified flags.
986; @param 3 The undefined flags.
987;
988%macro IEMIMPL_UNARY_OP 3
989BEGINCODE
990BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
991 PROLOGUE_2_ARGS
992 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
993 %1 byte [A0]
994 IEM_SAVE_FLAGS A1, %2, %3
995 EPILOGUE_2_ARGS
996ENDPROC iemAImpl_ %+ %1 %+ _u8
997
998BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
999 PROLOGUE_2_ARGS
1000 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1001 lock %1 byte [A0]
1002 IEM_SAVE_FLAGS A1, %2, %3
1003 EPILOGUE_2_ARGS
1004ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1005
1006BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1007 PROLOGUE_2_ARGS
1008 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1009 %1 word [A0]
1010 IEM_SAVE_FLAGS A1, %2, %3
1011 EPILOGUE_2_ARGS
1012ENDPROC iemAImpl_ %+ %1 %+ _u16
1013
1014BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1015 PROLOGUE_2_ARGS
1016 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1017 lock %1 word [A0]
1018 IEM_SAVE_FLAGS A1, %2, %3
1019 EPILOGUE_2_ARGS
1020ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1021
1022BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1023 PROLOGUE_2_ARGS
1024 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1025 %1 dword [A0]
1026 IEM_SAVE_FLAGS A1, %2, %3
1027 EPILOGUE_2_ARGS
1028ENDPROC iemAImpl_ %+ %1 %+ _u32
1029
1030BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1031 PROLOGUE_2_ARGS
1032 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1033 lock %1 dword [A0]
1034 IEM_SAVE_FLAGS A1, %2, %3
1035 EPILOGUE_2_ARGS
1036ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1037
1038 %ifdef RT_ARCH_AMD64
1039BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1040 PROLOGUE_2_ARGS
1041 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1042 %1 qword [A0]
1043 IEM_SAVE_FLAGS A1, %2, %3
1044 EPILOGUE_2_ARGS
1045ENDPROC iemAImpl_ %+ %1 %+ _u64
1046
1047BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1048 PROLOGUE_2_ARGS
1049 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1050 lock %1 qword [A0]
1051 IEM_SAVE_FLAGS A1, %2, %3
1052 EPILOGUE_2_ARGS
1053ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1054 %endif ; RT_ARCH_AMD64
1055
1056%endmacro
1057
1058IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1059IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1060IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1061IEMIMPL_UNARY_OP not, 0, 0
1062
1063
1064;;
1065; Macro for implementing memory fence operation.
1066;
1067; No return value, no operands or anything.
1068;
1069; @param 1 The instruction.
1070;
1071%macro IEMIMPL_MEM_FENCE 1
1072BEGINCODE
1073BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1074 %1
1075 ret
1076ENDPROC iemAImpl_ %+ %1
1077%endmacro
1078
1079IEMIMPL_MEM_FENCE lfence
1080IEMIMPL_MEM_FENCE sfence
1081IEMIMPL_MEM_FENCE mfence
1082
1083;;
1084; Alternative for non-SSE2 host.
1085;
1086BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1087 push xAX
1088 xchg xAX, [xSP]
1089 add xSP, xCB
1090 ret
1091ENDPROC iemAImpl_alt_mem_fence
1092
1093
1094
1095;;
1096; Macro for implementing a shift operation.
1097;
1098; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1099; 32-bit system where the 64-bit accesses requires hand coding.
1100;
1101; All the functions takes a pointer to the destination memory operand in A0,
1102; the shift count in A1 and a pointer to eflags in A2.
1103;
1104; @param 1 The instruction mnemonic.
1105; @param 2 The modified flags.
1106; @param 3 The undefined flags.
1107;
1108; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1109;
1110%macro IEMIMPL_SHIFT_OP 3
1111BEGINCODE
1112BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1113 PROLOGUE_3_ARGS
1114 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1115 %ifdef ASM_CALL64_GCC
1116 mov cl, A1_8
1117 %1 byte [A0], cl
1118 %else
1119 xchg A1, A0
1120 %1 byte [A1], cl
1121 %endif
1122 IEM_SAVE_FLAGS A2, %2, %3
1123 EPILOGUE_3_ARGS
1124ENDPROC iemAImpl_ %+ %1 %+ _u8
1125
1126BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1127 PROLOGUE_3_ARGS
1128 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1129 %ifdef ASM_CALL64_GCC
1130 mov cl, A1_8
1131 %1 word [A0], cl
1132 %else
1133 xchg A1, A0
1134 %1 word [A1], cl
1135 %endif
1136 IEM_SAVE_FLAGS A2, %2, %3
1137 EPILOGUE_3_ARGS
1138ENDPROC iemAImpl_ %+ %1 %+ _u16
1139
1140BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1141 PROLOGUE_3_ARGS
1142 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1143 %ifdef ASM_CALL64_GCC
1144 mov cl, A1_8
1145 %1 dword [A0], cl
1146 %else
1147 xchg A1, A0
1148 %1 dword [A1], cl
1149 %endif
1150 IEM_SAVE_FLAGS A2, %2, %3
1151 EPILOGUE_3_ARGS
1152ENDPROC iemAImpl_ %+ %1 %+ _u32
1153
1154 %ifdef RT_ARCH_AMD64
1155BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1156 PROLOGUE_3_ARGS
1157 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1158 %ifdef ASM_CALL64_GCC
1159 mov cl, A1_8
1160 %1 qword [A0], cl
1161 %else
1162 xchg A1, A0
1163 %1 qword [A1], cl
1164 %endif
1165 IEM_SAVE_FLAGS A2, %2, %3
1166 EPILOGUE_3_ARGS
1167ENDPROC iemAImpl_ %+ %1 %+ _u64
1168 %endif ; RT_ARCH_AMD64
1169
1170%endmacro
1171
1172IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1173IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1174IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1175IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1176IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1177IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1178IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1179
1180
1181;;
1182; Macro for implementing a double precision shift operation.
1183;
1184; This will generate code for the 16, 32 and 64 bit accesses, except on
1185; 32-bit system where the 64-bit accesses requires hand coding.
1186;
1187; The functions takes the destination operand (r/m) in A0, the source (reg) in
1188; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1189;
1190; @param 1 The instruction mnemonic.
1191; @param 2 The modified flags.
1192; @param 3 The undefined flags.
1193;
1194; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1195;
1196%macro IEMIMPL_SHIFT_DBL_OP 3
1197BEGINCODE
1198BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1199 PROLOGUE_4_ARGS
1200 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1201 %ifdef ASM_CALL64_GCC
1202 xchg A3, A2
1203 %1 [A0], A1_16, cl
1204 xchg A3, A2
1205 %else
1206 xchg A0, A2
1207 %1 [A2], A1_16, cl
1208 %endif
1209 IEM_SAVE_FLAGS A3, %2, %3
1210 EPILOGUE_4_ARGS
1211ENDPROC iemAImpl_ %+ %1 %+ _u16
1212
1213BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1214 PROLOGUE_4_ARGS
1215 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1216 %ifdef ASM_CALL64_GCC
1217 xchg A3, A2
1218 %1 [A0], A1_32, cl
1219 xchg A3, A2
1220 %else
1221 xchg A0, A2
1222 %1 [A2], A1_32, cl
1223 %endif
1224 IEM_SAVE_FLAGS A3, %2, %3
1225 EPILOGUE_4_ARGS
1226ENDPROC iemAImpl_ %+ %1 %+ _u32
1227
1228 %ifdef RT_ARCH_AMD64
1229BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1230 PROLOGUE_4_ARGS
1231 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1232 %ifdef ASM_CALL64_GCC
1233 xchg A3, A2
1234 %1 [A0], A1, cl
1235 xchg A3, A2
1236 %else
1237 xchg A0, A2
1238 %1 [A2], A1, cl
1239 %endif
1240 IEM_SAVE_FLAGS A3, %2, %3
1241 EPILOGUE_4_ARGS_EX 12
1242ENDPROC iemAImpl_ %+ %1 %+ _u64
1243 %endif ; RT_ARCH_AMD64
1244
1245%endmacro
1246
1247IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1248IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1249
1250
1251;;
1252; Macro for implementing a multiplication operations.
1253;
1254; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1255; 32-bit system where the 64-bit accesses requires hand coding.
1256;
1257; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1258; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1259; pointer to eflags in A3.
1260;
1261; The functions all return 0 so the caller can be used for div/idiv as well as
1262; for the mul/imul implementation.
1263;
1264; @param 1 The instruction mnemonic.
1265; @param 2 The modified flags.
1266; @param 3 The undefined flags.
1267;
1268; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1269;
1270%macro IEMIMPL_MUL_OP 3
1271BEGINCODE
1272BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1273 PROLOGUE_3_ARGS
1274 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1275 mov al, [A0]
1276 %1 A1_8
1277 mov [A0], ax
1278 IEM_SAVE_FLAGS A2, %2, %3
1279 xor eax, eax
1280 EPILOGUE_3_ARGS
1281ENDPROC iemAImpl_ %+ %1 %+ _u8
1282
1283BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1284 PROLOGUE_4_ARGS
1285 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1286 mov ax, [A0]
1287 %ifdef ASM_CALL64_GCC
1288 %1 A2_16
1289 mov [A0], ax
1290 mov [A1], dx
1291 %else
1292 mov T1, A1
1293 %1 A2_16
1294 mov [A0], ax
1295 mov [T1], dx
1296 %endif
1297 IEM_SAVE_FLAGS A3, %2, %3
1298 xor eax, eax
1299 EPILOGUE_4_ARGS
1300ENDPROC iemAImpl_ %+ %1 %+ _u16
1301
1302BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1303 PROLOGUE_4_ARGS
1304 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1305 mov eax, [A0]
1306 %ifdef ASM_CALL64_GCC
1307 %1 A2_32
1308 mov [A0], eax
1309 mov [A1], edx
1310 %else
1311 mov T1, A1
1312 %1 A2_32
1313 mov [A0], eax
1314 mov [T1], edx
1315 %endif
1316 IEM_SAVE_FLAGS A3, %2, %3
1317 xor eax, eax
1318 EPILOGUE_4_ARGS
1319ENDPROC iemAImpl_ %+ %1 %+ _u32
1320
1321 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1322BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1323 PROLOGUE_4_ARGS
1324 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1325 mov rax, [A0]
1326 %ifdef ASM_CALL64_GCC
1327 %1 A2
1328 mov [A0], rax
1329 mov [A1], rdx
1330 %else
1331 mov T1, A1
1332 %1 A2
1333 mov [A0], rax
1334 mov [T1], rdx
1335 %endif
1336 IEM_SAVE_FLAGS A3, %2, %3
1337 xor eax, eax
1338 EPILOGUE_4_ARGS_EX 12
1339ENDPROC iemAImpl_ %+ %1 %+ _u64
1340 %endif ; !RT_ARCH_AMD64
1341
1342%endmacro
1343
1344IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1345IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1346
1347
1348BEGINCODE
1349;;
1350; Worker function for negating a 32-bit number in T1:T0
1351; @uses None (T0,T1)
1352BEGINPROC iemAImpl_negate_T0_T1_u32
1353 push 0
1354 push 0
1355 xchg T0_32, [xSP]
1356 xchg T1_32, [xSP + xCB]
1357 sub T0_32, [xSP]
1358 sbb T1_32, [xSP + xCB]
1359 add xSP, xCB*2
1360 ret
1361ENDPROC iemAImpl_negate_T0_T1_u32
1362
1363%ifdef RT_ARCH_AMD64
1364;;
1365; Worker function for negating a 64-bit number in T1:T0
1366; @uses None (T0,T1)
1367BEGINPROC iemAImpl_negate_T0_T1_u64
1368 push 0
1369 push 0
1370 xchg T0, [xSP]
1371 xchg T1, [xSP + xCB]
1372 sub T0, [xSP]
1373 sbb T1, [xSP + xCB]
1374 add xSP, xCB*2
1375 ret
1376ENDPROC iemAImpl_negate_T0_T1_u64
1377%endif
1378
1379
1380;;
1381; Macro for implementing a division operations.
1382;
1383; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1384; 32-bit system where the 64-bit accesses requires hand coding.
1385;
1386; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1387; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1388; pointer to eflags in A3.
1389;
1390; The functions all return 0 on success and -1 if a divide error should be
1391; raised by the caller.
1392;
1393; @param 1 The instruction mnemonic.
1394; @param 2 The modified flags.
1395; @param 3 The undefined flags.
1396; @param 4 1 if signed, 0 if unsigned.
1397;
1398; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1399;
1400%macro IEMIMPL_DIV_OP 4
1401BEGINCODE
1402BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1403 PROLOGUE_3_ARGS
1404
1405 ; div by chainsaw check.
1406 test A1_8, A1_8
1407 jz .div_zero
1408
1409 ; Overflow check - unsigned division is simple to verify, haven't
1410 ; found a simple way to check signed division yet unfortunately.
1411 %if %4 == 0
1412 cmp [A0 + 1], A1_8
1413 jae .div_overflow
1414 %else
1415 mov T0_16, [A0] ; T0 = dividend
1416 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1417 test A1_8, A1_8
1418 js .divisor_negative
1419 test T0_16, T0_16
1420 jns .both_positive
1421 neg T0_16
1422.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1423 push T0 ; Start off like unsigned below.
1424 shr T0_16, 7
1425 cmp T0_8, A1_8
1426 pop T0
1427 jb .div_no_overflow
1428 ja .div_overflow
1429 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1430 cmp T0_8, A1_8
1431 jae .div_overflow
1432 jmp .div_no_overflow
1433
1434.divisor_negative:
1435 neg A1_8
1436 test T0_16, T0_16
1437 jns .one_of_each
1438 neg T0_16
1439.both_positive: ; Same as unsigned shifted by sign indicator bit.
1440 shr T0_16, 7
1441 cmp T0_8, A1_8
1442 jae .div_overflow
1443.div_no_overflow:
1444 mov A1, T1 ; restore divisor
1445 %endif
1446
1447 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1448 mov ax, [A0]
1449 %1 A1_8
1450 mov [A0], ax
1451 IEM_SAVE_FLAGS A2, %2, %3
1452 xor eax, eax
1453
1454.return:
1455 EPILOGUE_3_ARGS
1456
1457.div_zero:
1458.div_overflow:
1459 mov eax, -1
1460 jmp .return
1461ENDPROC iemAImpl_ %+ %1 %+ _u8
1462
1463BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1464 PROLOGUE_4_ARGS
1465
1466 ; div by chainsaw check.
1467 test A2_16, A2_16
1468 jz .div_zero
1469
1470 ; Overflow check - unsigned division is simple to verify, haven't
1471 ; found a simple way to check signed division yet unfortunately.
1472 %if %4 == 0
1473 cmp [A1], A2_16
1474 jae .div_overflow
1475 %else
1476 mov T0_16, [A1]
1477 shl T0_32, 16
1478 mov T0_16, [A0] ; T0 = dividend
1479 mov T1, A2 ; T1 = divisor
1480 test T1_16, T1_16
1481 js .divisor_negative
1482 test T0_32, T0_32
1483 jns .both_positive
1484 neg T0_32
1485.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1486 push T0 ; Start off like unsigned below.
1487 shr T0_32, 15
1488 cmp T0_16, T1_16
1489 pop T0
1490 jb .div_no_overflow
1491 ja .div_overflow
1492 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1493 cmp T0_16, T1_16
1494 jae .div_overflow
1495 jmp .div_no_overflow
1496
1497.divisor_negative:
1498 neg T1_16
1499 test T0_32, T0_32
1500 jns .one_of_each
1501 neg T0_32
1502.both_positive: ; Same as unsigned shifted by sign indicator bit.
1503 shr T0_32, 15
1504 cmp T0_16, T1_16
1505 jae .div_overflow
1506.div_no_overflow:
1507 %endif
1508
1509 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1510 %ifdef ASM_CALL64_GCC
1511 mov T1, A2
1512 mov ax, [A0]
1513 mov dx, [A1]
1514 %1 T1_16
1515 mov [A0], ax
1516 mov [A1], dx
1517 %else
1518 mov T1, A1
1519 mov ax, [A0]
1520 mov dx, [T1]
1521 %1 A2_16
1522 mov [A0], ax
1523 mov [T1], dx
1524 %endif
1525 IEM_SAVE_FLAGS A3, %2, %3
1526 xor eax, eax
1527
1528.return:
1529 EPILOGUE_4_ARGS
1530
1531.div_zero:
1532.div_overflow:
1533 mov eax, -1
1534 jmp .return
1535ENDPROC iemAImpl_ %+ %1 %+ _u16
1536
1537BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1538 PROLOGUE_4_ARGS
1539
1540 ; div by chainsaw check.
1541 test A2_32, A2_32
1542 jz .div_zero
1543
1544 ; Overflow check - unsigned division is simple to verify, haven't
1545 ; found a simple way to check signed division yet unfortunately.
1546 %if %4 == 0
1547 cmp [A1], A2_32
1548 jae .div_overflow
1549 %else
1550 push A2 ; save A2 so we modify it (we out of regs on x86).
1551 mov T0_32, [A0] ; T0 = dividend low
1552 mov T1_32, [A1] ; T1 = dividend high
1553 test A2_32, A2_32
1554 js .divisor_negative
1555 test T1_32, T1_32
1556 jns .both_positive
1557 call NAME(iemAImpl_negate_T0_T1_u32)
1558.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1559 push T0 ; Start off like unsigned below.
1560 shl T1_32, 1
1561 shr T0_32, 31
1562 or T1_32, T0_32
1563 cmp T1_32, A2_32
1564 pop T0
1565 jb .div_no_overflow
1566 ja .div_overflow
1567 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1568 cmp T0_32, A2_32
1569 jae .div_overflow
1570 jmp .div_no_overflow
1571
1572.divisor_negative:
1573 neg A2_32
1574 test T1_32, T1_32
1575 jns .one_of_each
1576 call NAME(iemAImpl_negate_T0_T1_u32)
1577.both_positive: ; Same as unsigned shifted by sign indicator bit.
1578 shl T1_32, 1
1579 shr T0_32, 31
1580 or T1_32, T0_32
1581 cmp T1_32, A2_32
1582 jae .div_overflow
1583.div_no_overflow:
1584 pop A2
1585 %endif
1586
1587 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1588 mov eax, [A0]
1589 %ifdef ASM_CALL64_GCC
1590 mov T1, A2
1591 mov eax, [A0]
1592 mov edx, [A1]
1593 %1 T1_32
1594 mov [A0], eax
1595 mov [A1], edx
1596 %else
1597 mov T1, A1
1598 mov eax, [A0]
1599 mov edx, [T1]
1600 %1 A2_32
1601 mov [A0], eax
1602 mov [T1], edx
1603 %endif
1604 IEM_SAVE_FLAGS A3, %2, %3
1605 xor eax, eax
1606
1607.return:
1608 EPILOGUE_4_ARGS
1609
1610.div_overflow:
1611 %if %4 != 0
1612 pop A2
1613 %endif
1614.div_zero:
1615 mov eax, -1
1616 jmp .return
1617ENDPROC iemAImpl_ %+ %1 %+ _u32
1618
1619 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1620BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1621 PROLOGUE_4_ARGS
1622
1623 test A2, A2
1624 jz .div_zero
1625 %if %4 == 0
1626 cmp [A1], A2
1627 jae .div_overflow
1628 %else
1629 push A2 ; save A2 so we modify it (we out of regs on x86).
1630 mov T0, [A0] ; T0 = dividend low
1631 mov T1, [A1] ; T1 = dividend high
1632 test A2, A2
1633 js .divisor_negative
1634 test T1, T1
1635 jns .both_positive
1636 call NAME(iemAImpl_negate_T0_T1_u64)
1637.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1638 push T0 ; Start off like unsigned below.
1639 shl T1, 1
1640 shr T0, 63
1641 or T1, T0
1642 cmp T1, A2
1643 pop T0
1644 jb .div_no_overflow
1645 ja .div_overflow
1646 mov T1, 0x7fffffffffffffff
1647 and T0, T1 ; Special case for covering (divisor - 1).
1648 cmp T0, A2
1649 jae .div_overflow
1650 jmp .div_no_overflow
1651
1652.divisor_negative:
1653 neg A2
1654 test T1, T1
1655 jns .one_of_each
1656 call NAME(iemAImpl_negate_T0_T1_u64)
1657.both_positive: ; Same as unsigned shifted by sign indicator bit.
1658 shl T1, 1
1659 shr T0, 63
1660 or T1, T0
1661 cmp T1, A2
1662 jae .div_overflow
1663.div_no_overflow:
1664 pop A2
1665 %endif
1666
1667 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1668 mov rax, [A0]
1669 %ifdef ASM_CALL64_GCC
1670 mov T1, A2
1671 mov rax, [A0]
1672 mov rdx, [A1]
1673 %1 T1
1674 mov [A0], rax
1675 mov [A1], rdx
1676 %else
1677 mov T1, A1
1678 mov rax, [A0]
1679 mov rdx, [T1]
1680 %1 A2
1681 mov [A0], rax
1682 mov [T1], rdx
1683 %endif
1684 IEM_SAVE_FLAGS A3, %2, %3
1685 xor eax, eax
1686
1687.return:
1688 EPILOGUE_4_ARGS_EX 12
1689
1690.div_overflow:
1691 %if %4 != 0
1692 pop A2
1693 %endif
1694.div_zero:
1695 mov eax, -1
1696 jmp .return
1697ENDPROC iemAImpl_ %+ %1 %+ _u64
1698 %endif ; !RT_ARCH_AMD64
1699
1700%endmacro
1701
1702IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1703IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1704
1705
1706;
1707; BSWAP. No flag changes.
1708;
1709; Each function takes one argument, pointer to the value to bswap
1710; (input/output). They all return void.
1711;
1712BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1713 PROLOGUE_1_ARGS
1714 mov T0_32, [A0] ; just in case any of the upper bits are used.
1715 db 66h
1716 bswap T0_32
1717 mov [A0], T0_32
1718 EPILOGUE_1_ARGS
1719ENDPROC iemAImpl_bswap_u16
1720
1721BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1722 PROLOGUE_1_ARGS
1723 mov T0_32, [A0]
1724 bswap T0_32
1725 mov [A0], T0_32
1726 EPILOGUE_1_ARGS
1727ENDPROC iemAImpl_bswap_u32
1728
1729BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1730%ifdef RT_ARCH_AMD64
1731 PROLOGUE_1_ARGS
1732 mov T0, [A0]
1733 bswap T0
1734 mov [A0], T0
1735 EPILOGUE_1_ARGS
1736%else
1737 PROLOGUE_1_ARGS
1738 mov T0, [A0]
1739 mov T1, [A0 + 4]
1740 bswap T0
1741 bswap T1
1742 mov [A0 + 4], T0
1743 mov [A0], T1
1744 EPILOGUE_1_ARGS
1745%endif
1746ENDPROC iemAImpl_bswap_u64
1747
1748
1749;;
1750; Initialize the FPU for the actual instruction being emulated, this means
1751; loading parts of the guest's control word and status word.
1752;
1753; @uses 24 bytes of stack.
1754; @param 1 Expression giving the address of the FXSTATE of the guest.
1755;
1756%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1757 fnstenv [xSP]
1758
1759 ; FCW - for exception, precision and rounding control.
1760 movzx T0, word [%1 + X86FXSTATE.FCW]
1761 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1762 mov [xSP + X86FSTENV32P.FCW], T0_16
1763
1764 ; FSW - for undefined C0, C1, C2, and C3.
1765 movzx T1, word [%1 + X86FXSTATE.FSW]
1766 and T1, X86_FSW_C_MASK
1767 movzx T0, word [xSP + X86FSTENV32P.FSW]
1768 and T0, X86_FSW_TOP_MASK
1769 or T0, T1
1770 mov [xSP + X86FSTENV32P.FSW], T0_16
1771
1772 fldenv [xSP]
1773%endmacro
1774
1775
1776;;
1777; Need to move this as well somewhere better?
1778;
1779struc IEMFPURESULT
1780 .r80Result resw 5
1781 .FSW resw 1
1782endstruc
1783
1784
1785;;
1786; Need to move this as well somewhere better?
1787;
1788struc IEMFPURESULTTWO
1789 .r80Result1 resw 5
1790 .FSW resw 1
1791 .r80Result2 resw 5
1792endstruc
1793
1794
1795;
1796;---------------------- 16-bit signed integer operations ----------------------
1797;
1798
1799
1800;;
1801; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1802;
1803; @param A0 FPU context (fxsave).
1804; @param A1 Pointer to a IEMFPURESULT for the output.
1805; @param A2 Pointer to the 16-bit floating point value to convert.
1806;
1807BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1808 PROLOGUE_3_ARGS
1809 sub xSP, 20h
1810
1811 fninit
1812 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1813 fild word [A2]
1814
1815 fnstsw word [A1 + IEMFPURESULT.FSW]
1816 fnclex
1817 fstp tword [A1 + IEMFPURESULT.r80Result]
1818
1819 fninit
1820 add xSP, 20h
1821 EPILOGUE_3_ARGS
1822ENDPROC iemAImpl_fild_i16_to_r80
1823
1824
1825;;
1826; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1827;
1828; @param A0 FPU context (fxsave).
1829; @param A1 Where to return the output FSW.
1830; @param A2 Where to store the 16-bit signed integer value.
1831; @param A3 Pointer to the 80-bit value.
1832;
1833BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1834 PROLOGUE_4_ARGS
1835 sub xSP, 20h
1836
1837 fninit
1838 fld tword [A3]
1839 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1840 fistp word [A2]
1841
1842 fnstsw word [A1]
1843
1844 fninit
1845 add xSP, 20h
1846 EPILOGUE_4_ARGS
1847ENDPROC iemAImpl_fist_r80_to_i16
1848
1849
1850;;
1851; Store a 80-bit floating point value (register) as a 16-bit signed integer
1852; (memory) with truncation.
1853;
1854; @param A0 FPU context (fxsave).
1855; @param A1 Where to return the output FSW.
1856; @param A2 Where to store the 16-bit signed integer value.
1857; @param A3 Pointer to the 80-bit value.
1858;
1859BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1860 PROLOGUE_4_ARGS
1861 sub xSP, 20h
1862
1863 fninit
1864 fld tword [A3]
1865 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1866 fisttp dword [A2]
1867
1868 fnstsw word [A1]
1869
1870 fninit
1871 add xSP, 20h
1872 EPILOGUE_4_ARGS
1873ENDPROC iemAImpl_fistt_r80_to_i16
1874
1875
1876;;
1877; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1878;
1879; @param 1 The instruction
1880;
1881; @param A0 FPU context (fxsave).
1882; @param A1 Pointer to a IEMFPURESULT for the output.
1883; @param A2 Pointer to the 80-bit value.
1884; @param A3 Pointer to the 16-bit value.
1885;
1886%macro IEMIMPL_FPU_R80_BY_I16 1
1887BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1888 PROLOGUE_4_ARGS
1889 sub xSP, 20h
1890
1891 fninit
1892 fld tword [A2]
1893 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1894 %1 word [A3]
1895
1896 fnstsw word [A1 + IEMFPURESULT.FSW]
1897 fnclex
1898 fstp tword [A1 + IEMFPURESULT.r80Result]
1899
1900 fninit
1901 add xSP, 20h
1902 EPILOGUE_4_ARGS
1903ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1904%endmacro
1905
1906IEMIMPL_FPU_R80_BY_I16 fiadd
1907IEMIMPL_FPU_R80_BY_I16 fimul
1908IEMIMPL_FPU_R80_BY_I16 fisub
1909IEMIMPL_FPU_R80_BY_I16 fisubr
1910IEMIMPL_FPU_R80_BY_I16 fidiv
1911IEMIMPL_FPU_R80_BY_I16 fidivr
1912
1913
1914;;
1915; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1916; only returning FSW.
1917;
1918; @param 1 The instruction
1919;
1920; @param A0 FPU context (fxsave).
1921; @param A1 Where to store the output FSW.
1922; @param A2 Pointer to the 80-bit value.
1923; @param A3 Pointer to the 64-bit value.
1924;
1925%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1926BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1927 PROLOGUE_4_ARGS
1928 sub xSP, 20h
1929
1930 fninit
1931 fld tword [A2]
1932 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1933 %1 word [A3]
1934
1935 fnstsw word [A1]
1936
1937 fninit
1938 add xSP, 20h
1939 EPILOGUE_4_ARGS
1940ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1941%endmacro
1942
1943IEMIMPL_FPU_R80_BY_I16_FSW ficom
1944
1945
1946
1947;
1948;---------------------- 32-bit signed integer operations ----------------------
1949;
1950
1951
1952;;
1953; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1954;
1955; @param A0 FPU context (fxsave).
1956; @param A1 Pointer to a IEMFPURESULT for the output.
1957; @param A2 Pointer to the 32-bit floating point value to convert.
1958;
1959BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1960 PROLOGUE_3_ARGS
1961 sub xSP, 20h
1962
1963 fninit
1964 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1965 fild dword [A2]
1966
1967 fnstsw word [A1 + IEMFPURESULT.FSW]
1968 fnclex
1969 fstp tword [A1 + IEMFPURESULT.r80Result]
1970
1971 fninit
1972 add xSP, 20h
1973 EPILOGUE_3_ARGS
1974ENDPROC iemAImpl_fild_i32_to_r80
1975
1976
1977;;
1978; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1979;
1980; @param A0 FPU context (fxsave).
1981; @param A1 Where to return the output FSW.
1982; @param A2 Where to store the 32-bit signed integer value.
1983; @param A3 Pointer to the 80-bit value.
1984;
1985BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1986 PROLOGUE_4_ARGS
1987 sub xSP, 20h
1988
1989 fninit
1990 fld tword [A3]
1991 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1992 fistp dword [A2]
1993
1994 fnstsw word [A1]
1995
1996 fninit
1997 add xSP, 20h
1998 EPILOGUE_4_ARGS
1999ENDPROC iemAImpl_fist_r80_to_i32
2000
2001
2002;;
2003; Store a 80-bit floating point value (register) as a 32-bit signed integer
2004; (memory) with truncation.
2005;
2006; @param A0 FPU context (fxsave).
2007; @param A1 Where to return the output FSW.
2008; @param A2 Where to store the 32-bit signed integer value.
2009; @param A3 Pointer to the 80-bit value.
2010;
2011BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2012 PROLOGUE_4_ARGS
2013 sub xSP, 20h
2014
2015 fninit
2016 fld tword [A3]
2017 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2018 fisttp dword [A2]
2019
2020 fnstsw word [A1]
2021
2022 fninit
2023 add xSP, 20h
2024 EPILOGUE_4_ARGS
2025ENDPROC iemAImpl_fistt_r80_to_i32
2026
2027
2028;;
2029; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2030;
2031; @param 1 The instruction
2032;
2033; @param A0 FPU context (fxsave).
2034; @param A1 Pointer to a IEMFPURESULT for the output.
2035; @param A2 Pointer to the 80-bit value.
2036; @param A3 Pointer to the 32-bit value.
2037;
2038%macro IEMIMPL_FPU_R80_BY_I32 1
2039BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2040 PROLOGUE_4_ARGS
2041 sub xSP, 20h
2042
2043 fninit
2044 fld tword [A2]
2045 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2046 %1 dword [A3]
2047
2048 fnstsw word [A1 + IEMFPURESULT.FSW]
2049 fnclex
2050 fstp tword [A1 + IEMFPURESULT.r80Result]
2051
2052 fninit
2053 add xSP, 20h
2054 EPILOGUE_4_ARGS
2055ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2056%endmacro
2057
2058IEMIMPL_FPU_R80_BY_I32 fiadd
2059IEMIMPL_FPU_R80_BY_I32 fimul
2060IEMIMPL_FPU_R80_BY_I32 fisub
2061IEMIMPL_FPU_R80_BY_I32 fisubr
2062IEMIMPL_FPU_R80_BY_I32 fidiv
2063IEMIMPL_FPU_R80_BY_I32 fidivr
2064
2065
2066;;
2067; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2068; only returning FSW.
2069;
2070; @param 1 The instruction
2071;
2072; @param A0 FPU context (fxsave).
2073; @param A1 Where to store the output FSW.
2074; @param A2 Pointer to the 80-bit value.
2075; @param A3 Pointer to the 64-bit value.
2076;
2077%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2078BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2079 PROLOGUE_4_ARGS
2080 sub xSP, 20h
2081
2082 fninit
2083 fld tword [A2]
2084 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2085 %1 dword [A3]
2086
2087 fnstsw word [A1]
2088
2089 fninit
2090 add xSP, 20h
2091 EPILOGUE_4_ARGS
2092ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2093%endmacro
2094
2095IEMIMPL_FPU_R80_BY_I32_FSW ficom
2096
2097
2098
2099;
2100;---------------------- 64-bit signed integer operations ----------------------
2101;
2102
2103
2104;;
2105; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2106;
2107; @param A0 FPU context (fxsave).
2108; @param A1 Pointer to a IEMFPURESULT for the output.
2109; @param A2 Pointer to the 64-bit floating point value to convert.
2110;
2111BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2112 PROLOGUE_3_ARGS
2113 sub xSP, 20h
2114
2115 fninit
2116 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2117 fild qword [A2]
2118
2119 fnstsw word [A1 + IEMFPURESULT.FSW]
2120 fnclex
2121 fstp tword [A1 + IEMFPURESULT.r80Result]
2122
2123 fninit
2124 add xSP, 20h
2125 EPILOGUE_3_ARGS
2126ENDPROC iemAImpl_fild_i64_to_r80
2127
2128
2129;;
2130; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2131;
2132; @param A0 FPU context (fxsave).
2133; @param A1 Where to return the output FSW.
2134; @param A2 Where to store the 64-bit signed integer value.
2135; @param A3 Pointer to the 80-bit value.
2136;
2137BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2138 PROLOGUE_4_ARGS
2139 sub xSP, 20h
2140
2141 fninit
2142 fld tword [A3]
2143 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2144 fistp qword [A2]
2145
2146 fnstsw word [A1]
2147
2148 fninit
2149 add xSP, 20h
2150 EPILOGUE_4_ARGS
2151ENDPROC iemAImpl_fist_r80_to_i64
2152
2153
2154;;
2155; Store a 80-bit floating point value (register) as a 64-bit signed integer
2156; (memory) with truncation.
2157;
2158; @param A0 FPU context (fxsave).
2159; @param A1 Where to return the output FSW.
2160; @param A2 Where to store the 64-bit signed integer value.
2161; @param A3 Pointer to the 80-bit value.
2162;
2163BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2164 PROLOGUE_4_ARGS
2165 sub xSP, 20h
2166
2167 fninit
2168 fld tword [A3]
2169 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2170 fisttp qword [A2]
2171
2172 fnstsw word [A1]
2173
2174 fninit
2175 add xSP, 20h
2176 EPILOGUE_4_ARGS
2177ENDPROC iemAImpl_fistt_r80_to_i64
2178
2179
2180
2181;
2182;---------------------- 32-bit floating point operations ----------------------
2183;
2184
2185;;
2186; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2187;
2188; @param A0 FPU context (fxsave).
2189; @param A1 Pointer to a IEMFPURESULT for the output.
2190; @param A2 Pointer to the 32-bit floating point value to convert.
2191;
2192BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2193 PROLOGUE_3_ARGS
2194 sub xSP, 20h
2195
2196 fninit
2197 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2198 fld dword [A2]
2199
2200 fnstsw word [A1 + IEMFPURESULT.FSW]
2201 fnclex
2202 fstp tword [A1 + IEMFPURESULT.r80Result]
2203
2204 fninit
2205 add xSP, 20h
2206 EPILOGUE_3_ARGS
2207ENDPROC iemAImpl_fld_r32_to_r80
2208
2209
2210;;
2211; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2212;
2213; @param A0 FPU context (fxsave).
2214; @param A1 Where to return the output FSW.
2215; @param A2 Where to store the 32-bit value.
2216; @param A3 Pointer to the 80-bit value.
2217;
2218BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2219 PROLOGUE_4_ARGS
2220 sub xSP, 20h
2221
2222 fninit
2223 fld tword [A3]
2224 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2225 fst dword [A2]
2226
2227 fnstsw word [A1]
2228
2229 fninit
2230 add xSP, 20h
2231 EPILOGUE_4_ARGS
2232ENDPROC iemAImpl_fst_r80_to_r32
2233
2234
2235;;
2236; FPU instruction working on one 80-bit and one 32-bit floating point value.
2237;
2238; @param 1 The instruction
2239;
2240; @param A0 FPU context (fxsave).
2241; @param A1 Pointer to a IEMFPURESULT for the output.
2242; @param A2 Pointer to the 80-bit value.
2243; @param A3 Pointer to the 32-bit value.
2244;
2245%macro IEMIMPL_FPU_R80_BY_R32 1
2246BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2247 PROLOGUE_4_ARGS
2248 sub xSP, 20h
2249
2250 fninit
2251 fld tword [A2]
2252 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2253 %1 dword [A3]
2254
2255 fnstsw word [A1 + IEMFPURESULT.FSW]
2256 fnclex
2257 fstp tword [A1 + IEMFPURESULT.r80Result]
2258
2259 fninit
2260 add xSP, 20h
2261 EPILOGUE_4_ARGS
2262ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2263%endmacro
2264
2265IEMIMPL_FPU_R80_BY_R32 fadd
2266IEMIMPL_FPU_R80_BY_R32 fmul
2267IEMIMPL_FPU_R80_BY_R32 fsub
2268IEMIMPL_FPU_R80_BY_R32 fsubr
2269IEMIMPL_FPU_R80_BY_R32 fdiv
2270IEMIMPL_FPU_R80_BY_R32 fdivr
2271
2272
2273;;
2274; FPU instruction working on one 80-bit and one 32-bit floating point value,
2275; only returning FSW.
2276;
2277; @param 1 The instruction
2278;
2279; @param A0 FPU context (fxsave).
2280; @param A1 Where to store the output FSW.
2281; @param A2 Pointer to the 80-bit value.
2282; @param A3 Pointer to the 64-bit value.
2283;
2284%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2285BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2286 PROLOGUE_4_ARGS
2287 sub xSP, 20h
2288
2289 fninit
2290 fld tword [A2]
2291 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2292 %1 dword [A3]
2293
2294 fnstsw word [A1]
2295
2296 fninit
2297 add xSP, 20h
2298 EPILOGUE_4_ARGS
2299ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2300%endmacro
2301
2302IEMIMPL_FPU_R80_BY_R32_FSW fcom
2303
2304
2305
2306;
2307;---------------------- 64-bit floating point operations ----------------------
2308;
2309
2310;;
2311; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2312;
2313; @param A0 FPU context (fxsave).
2314; @param A1 Pointer to a IEMFPURESULT for the output.
2315; @param A2 Pointer to the 64-bit floating point value to convert.
2316;
2317BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2318 PROLOGUE_3_ARGS
2319 sub xSP, 20h
2320
2321 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2322 fld qword [A2]
2323
2324 fnstsw word [A1 + IEMFPURESULT.FSW]
2325 fnclex
2326 fstp tword [A1 + IEMFPURESULT.r80Result]
2327
2328 fninit
2329 add xSP, 20h
2330 EPILOGUE_3_ARGS
2331ENDPROC iemAImpl_fld_r64_to_r80
2332
2333
2334;;
2335; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2336;
2337; @param A0 FPU context (fxsave).
2338; @param A1 Where to return the output FSW.
2339; @param A2 Where to store the 64-bit value.
2340; @param A3 Pointer to the 80-bit value.
2341;
2342BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2343 PROLOGUE_4_ARGS
2344 sub xSP, 20h
2345
2346 fninit
2347 fld tword [A3]
2348 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2349 fst qword [A2]
2350
2351 fnstsw word [A1]
2352
2353 fninit
2354 add xSP, 20h
2355 EPILOGUE_4_ARGS
2356ENDPROC iemAImpl_fst_r80_to_r64
2357
2358
2359;;
2360; FPU instruction working on one 80-bit and one 64-bit floating point value.
2361;
2362; @param 1 The instruction
2363;
2364; @param A0 FPU context (fxsave).
2365; @param A1 Pointer to a IEMFPURESULT for the output.
2366; @param A2 Pointer to the 80-bit value.
2367; @param A3 Pointer to the 64-bit value.
2368;
2369%macro IEMIMPL_FPU_R80_BY_R64 1
2370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2371 PROLOGUE_4_ARGS
2372 sub xSP, 20h
2373
2374 fninit
2375 fld tword [A2]
2376 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2377 %1 qword [A3]
2378
2379 fnstsw word [A1 + IEMFPURESULT.FSW]
2380 fnclex
2381 fstp tword [A1 + IEMFPURESULT.r80Result]
2382
2383 fninit
2384 add xSP, 20h
2385 EPILOGUE_4_ARGS
2386ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2387%endmacro
2388
2389IEMIMPL_FPU_R80_BY_R64 fadd
2390IEMIMPL_FPU_R80_BY_R64 fmul
2391IEMIMPL_FPU_R80_BY_R64 fsub
2392IEMIMPL_FPU_R80_BY_R64 fsubr
2393IEMIMPL_FPU_R80_BY_R64 fdiv
2394IEMIMPL_FPU_R80_BY_R64 fdivr
2395
2396;;
2397; FPU instruction working on one 80-bit and one 64-bit floating point value,
2398; only returning FSW.
2399;
2400; @param 1 The instruction
2401;
2402; @param A0 FPU context (fxsave).
2403; @param A1 Where to store the output FSW.
2404; @param A2 Pointer to the 80-bit value.
2405; @param A3 Pointer to the 64-bit value.
2406;
2407%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2408BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2409 PROLOGUE_4_ARGS
2410 sub xSP, 20h
2411
2412 fninit
2413 fld tword [A2]
2414 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2415 %1 qword [A3]
2416
2417 fnstsw word [A1]
2418
2419 fninit
2420 add xSP, 20h
2421 EPILOGUE_4_ARGS
2422ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2423%endmacro
2424
2425IEMIMPL_FPU_R80_BY_R64_FSW fcom
2426
2427
2428
2429;
2430;---------------------- 80-bit floating point operations ----------------------
2431;
2432
2433;;
2434; Loads a 80-bit floating point register value from memory.
2435;
2436; @param A0 FPU context (fxsave).
2437; @param A1 Pointer to a IEMFPURESULT for the output.
2438; @param A2 Pointer to the 80-bit floating point value to load.
2439;
2440BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2441 PROLOGUE_3_ARGS
2442 sub xSP, 20h
2443
2444 fninit
2445 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2446 fld tword [A2]
2447
2448 fnstsw word [A1 + IEMFPURESULT.FSW]
2449 fnclex
2450 fstp tword [A1 + IEMFPURESULT.r80Result]
2451
2452 fninit
2453 add xSP, 20h
2454 EPILOGUE_3_ARGS
2455ENDPROC iemAImpl_fld_r80_from_r80
2456
2457
2458;;
2459; Store a 80-bit floating point register to memory
2460;
2461; @param A0 FPU context (fxsave).
2462; @param A1 Where to return the output FSW.
2463; @param A2 Where to store the 80-bit value.
2464; @param A3 Pointer to the 80-bit register value.
2465;
2466BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2467 PROLOGUE_4_ARGS
2468 sub xSP, 20h
2469
2470 fninit
2471 fld tword [A3]
2472 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2473 fstp tword [A2]
2474
2475 fnstsw word [A1]
2476
2477 fninit
2478 add xSP, 20h
2479 EPILOGUE_4_ARGS
2480ENDPROC iemAImpl_fst_r80_to_r80
2481
2482
2483;;
2484; FPU instruction working on two 80-bit floating point values.
2485;
2486; @param 1 The instruction
2487;
2488; @param A0 FPU context (fxsave).
2489; @param A1 Pointer to a IEMFPURESULT for the output.
2490; @param A2 Pointer to the first 80-bit value (ST0)
2491; @param A3 Pointer to the second 80-bit value (STn).
2492;
2493%macro IEMIMPL_FPU_R80_BY_R80 2
2494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2495 PROLOGUE_4_ARGS
2496 sub xSP, 20h
2497
2498 fninit
2499 fld tword [A3]
2500 fld tword [A2]
2501 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2502 %1 %2
2503
2504 fnstsw word [A1 + IEMFPURESULT.FSW]
2505 fnclex
2506 fstp tword [A1 + IEMFPURESULT.r80Result]
2507
2508 fninit
2509 add xSP, 20h
2510 EPILOGUE_4_ARGS
2511ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2512%endmacro
2513
2514IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2515IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2516IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2517IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2518IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2519IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2520IEMIMPL_FPU_R80_BY_R80 fprem, {}
2521IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2522IEMIMPL_FPU_R80_BY_R80 fscale, {}
2523
2524
2525;;
2526; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2527; storing the result in ST1 and popping the stack.
2528;
2529; @param 1 The instruction
2530;
2531; @param A0 FPU context (fxsave).
2532; @param A1 Pointer to a IEMFPURESULT for the output.
2533; @param A2 Pointer to the first 80-bit value (ST1).
2534; @param A3 Pointer to the second 80-bit value (ST0).
2535;
2536%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2537BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2538 PROLOGUE_4_ARGS
2539 sub xSP, 20h
2540
2541 fninit
2542 fld tword [A2]
2543 fld tword [A3]
2544 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2545 %1
2546
2547 fnstsw word [A1 + IEMFPURESULT.FSW]
2548 fnclex
2549 fstp tword [A1 + IEMFPURESULT.r80Result]
2550
2551 fninit
2552 add xSP, 20h
2553 EPILOGUE_4_ARGS
2554ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2555%endmacro
2556
2557IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2558IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2559IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2560
2561
2562;;
2563; FPU instruction working on two 80-bit floating point values, only
2564; returning FSW.
2565;
2566; @param 1 The instruction
2567;
2568; @param A0 FPU context (fxsave).
2569; @param A1 Pointer to a uint16_t for the resulting FSW.
2570; @param A2 Pointer to the first 80-bit value.
2571; @param A3 Pointer to the second 80-bit value.
2572;
2573%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2574BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2575 PROLOGUE_4_ARGS
2576 sub xSP, 20h
2577
2578 fninit
2579 fld tword [A3]
2580 fld tword [A2]
2581 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2582 %1 st0, st1
2583
2584 fnstsw word [A1]
2585
2586 fninit
2587 add xSP, 20h
2588 EPILOGUE_4_ARGS
2589ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2590%endmacro
2591
2592IEMIMPL_FPU_R80_BY_R80_FSW fcom
2593IEMIMPL_FPU_R80_BY_R80_FSW fucom
2594
2595
2596;;
2597; FPU instruction working on two 80-bit floating point values,
2598; returning FSW and EFLAGS (eax).
2599;
2600; @param 1 The instruction
2601;
2602; @returns EFLAGS in EAX.
2603; @param A0 FPU context (fxsave).
2604; @param A1 Pointer to a uint16_t for the resulting FSW.
2605; @param A2 Pointer to the first 80-bit value.
2606; @param A3 Pointer to the second 80-bit value.
2607;
2608%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2609BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2610 PROLOGUE_4_ARGS
2611 sub xSP, 20h
2612
2613 fninit
2614 fld tword [A3]
2615 fld tword [A2]
2616 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2617 %1 st1
2618
2619 fnstsw word [A1]
2620 pushf
2621 pop xAX
2622
2623 fninit
2624 add xSP, 20h
2625 EPILOGUE_4_ARGS
2626ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2627%endmacro
2628
2629IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2630IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2631
2632
2633;;
2634; FPU instruction working on one 80-bit floating point value.
2635;
2636; @param 1 The instruction
2637;
2638; @param A0 FPU context (fxsave).
2639; @param A1 Pointer to a IEMFPURESULT for the output.
2640; @param A2 Pointer to the 80-bit value.
2641;
2642%macro IEMIMPL_FPU_R80 1
2643BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2644 PROLOGUE_3_ARGS
2645 sub xSP, 20h
2646
2647 fninit
2648 fld tword [A2]
2649 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2650 %1
2651
2652 fnstsw word [A1 + IEMFPURESULT.FSW]
2653 fnclex
2654 fstp tword [A1 + IEMFPURESULT.r80Result]
2655
2656 fninit
2657 add xSP, 20h
2658 EPILOGUE_3_ARGS
2659ENDPROC iemAImpl_ %+ %1 %+ _r80
2660%endmacro
2661
2662IEMIMPL_FPU_R80 fchs
2663IEMIMPL_FPU_R80 fabs
2664IEMIMPL_FPU_R80 f2xm1
2665IEMIMPL_FPU_R80 fsqrt
2666IEMIMPL_FPU_R80 frndint
2667IEMIMPL_FPU_R80 fsin
2668IEMIMPL_FPU_R80 fcos
2669
2670
2671;;
2672; FPU instruction working on one 80-bit floating point value, only
2673; returning FSW.
2674;
2675; @param 1 The instruction
2676;
2677; @param A0 FPU context (fxsave).
2678; @param A1 Pointer to a uint16_t for the resulting FSW.
2679; @param A2 Pointer to the 80-bit value.
2680;
2681%macro IEMIMPL_FPU_R80_FSW 1
2682BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2683 PROLOGUE_3_ARGS
2684 sub xSP, 20h
2685
2686 fninit
2687 fld tword [A2]
2688 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2689 %1
2690
2691 fnstsw word [A1]
2692
2693 fninit
2694 add xSP, 20h
2695 EPILOGUE_3_ARGS
2696ENDPROC iemAImpl_ %+ %1 %+ _r80
2697%endmacro
2698
2699IEMIMPL_FPU_R80_FSW ftst
2700IEMIMPL_FPU_R80_FSW fxam
2701
2702
2703
2704;;
2705; FPU instruction loading a 80-bit floating point constant.
2706;
2707; @param 1 The instruction
2708;
2709; @param A0 FPU context (fxsave).
2710; @param A1 Pointer to a IEMFPURESULT for the output.
2711;
2712%macro IEMIMPL_FPU_R80_CONST 1
2713BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2714 PROLOGUE_2_ARGS
2715 sub xSP, 20h
2716
2717 fninit
2718 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2719 %1
2720
2721 fnstsw word [A1 + IEMFPURESULT.FSW]
2722 fnclex
2723 fstp tword [A1 + IEMFPURESULT.r80Result]
2724
2725 fninit
2726 add xSP, 20h
2727 EPILOGUE_2_ARGS
2728ENDPROC iemAImpl_ %+ %1 %+
2729%endmacro
2730
2731IEMIMPL_FPU_R80_CONST fld1
2732IEMIMPL_FPU_R80_CONST fldl2t
2733IEMIMPL_FPU_R80_CONST fldl2e
2734IEMIMPL_FPU_R80_CONST fldpi
2735IEMIMPL_FPU_R80_CONST fldlg2
2736IEMIMPL_FPU_R80_CONST fldln2
2737IEMIMPL_FPU_R80_CONST fldz
2738
2739
2740;;
2741; FPU instruction working on one 80-bit floating point value, outputing two.
2742;
2743; @param 1 The instruction
2744;
2745; @param A0 FPU context (fxsave).
2746; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2747; @param A2 Pointer to the 80-bit value.
2748;
2749%macro IEMIMPL_FPU_R80_R80 1
2750BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2751 PROLOGUE_3_ARGS
2752 sub xSP, 20h
2753
2754 fninit
2755 fld tword [A2]
2756 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2757 %1
2758
2759 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2760 fnclex
2761 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2762 fnclex
2763 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2764
2765 fninit
2766 add xSP, 20h
2767 EPILOGUE_3_ARGS
2768ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2769%endmacro
2770
2771IEMIMPL_FPU_R80_R80 fptan
2772IEMIMPL_FPU_R80_R80 fxtract
2773IEMIMPL_FPU_R80_R80 fsincos
2774
2775
2776
2777
2778;---------------------- SSE and MMX Operations ----------------------
2779
2780;; @todo what do we need to do for MMX?
2781%macro IEMIMPL_MMX_PROLOGUE 0
2782%endmacro
2783%macro IEMIMPL_MMX_EPILOGUE 0
2784%endmacro
2785
2786;; @todo what do we need to do for SSE?
2787%macro IEMIMPL_SSE_PROLOGUE 0
2788%endmacro
2789%macro IEMIMPL_SSE_EPILOGUE 0
2790%endmacro
2791
2792
2793;;
2794; Media instruction working on two full sized registers.
2795;
2796; @param 1 The instruction
2797;
2798; @param A0 FPU context (fxsave).
2799; @param A1 Pointer to the first media register size operand (input/output).
2800; @param A2 Pointer to the second media register size operand (input).
2801;
2802%macro IEMIMPL_MEDIA_F2 1
2803BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2804 PROLOGUE_3_ARGS
2805 IEMIMPL_MMX_PROLOGUE
2806
2807 movq mm0, [A1]
2808 movq mm1, [A2]
2809 %1 mm0, mm1
2810 movq [A1], mm0
2811
2812 IEMIMPL_MMX_EPILOGUE
2813 EPILOGUE_3_ARGS
2814ENDPROC iemAImpl_ %+ %1 %+ _u64
2815
2816BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2817 PROLOGUE_3_ARGS
2818 IEMIMPL_SSE_PROLOGUE
2819
2820 movdqu xmm0, [A1]
2821 movdqu xmm1, [A2]
2822 %1 xmm0, xmm1
2823 movdqu [A1], xmm0
2824
2825 IEMIMPL_SSE_EPILOGUE
2826 EPILOGUE_3_ARGS
2827ENDPROC iemAImpl_ %+ %1 %+ _u128
2828%endmacro
2829
2830IEMIMPL_MEDIA_F2 pxor
2831IEMIMPL_MEDIA_F2 pcmpeqb
2832IEMIMPL_MEDIA_F2 pcmpeqw
2833IEMIMPL_MEDIA_F2 pcmpeqd
2834
2835
2836;;
2837; Media instruction working on one full sized and one half sized register (lower half).
2838;
2839; @param 1 The instruction
2840; @param 2 1 if MMX is included, 0 if not.
2841;
2842; @param A0 FPU context (fxsave).
2843; @param A1 Pointer to the first full sized media register operand (input/output).
2844; @param A2 Pointer to the second half sized media register operand (input).
2845;
2846%macro IEMIMPL_MEDIA_F1L1 2
2847 %if %2 != 0
2848BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2849 PROLOGUE_3_ARGS
2850 IEMIMPL_MMX_PROLOGUE
2851
2852 movq mm0, [A1]
2853 movd mm1, [A2]
2854 %1 mm0, mm1
2855 movq [A1], mm0
2856
2857 IEMIMPL_MMX_EPILOGUE
2858 EPILOGUE_3_ARGS
2859ENDPROC iemAImpl_ %+ %1 %+ _u64
2860 %endif
2861
2862BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2863 PROLOGUE_3_ARGS
2864 IEMIMPL_SSE_PROLOGUE
2865
2866 movdqu xmm0, [A1]
2867 movq xmm1, [A2]
2868 %1 xmm0, xmm1
2869 movdqu [A1], xmm0
2870
2871 IEMIMPL_SSE_EPILOGUE
2872 EPILOGUE_3_ARGS
2873ENDPROC iemAImpl_ %+ %1 %+ _u128
2874%endmacro
2875
2876IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2877IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2878IEMIMPL_MEDIA_F1L1 punpckldq, 1
2879IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2880
2881
2882;;
2883; Media instruction working on one full sized and one half sized register (high half).
2884;
2885; @param 1 The instruction
2886; @param 2 1 if MMX is included, 0 if not.
2887;
2888; @param A0 FPU context (fxsave).
2889; @param A1 Pointer to the first full sized media register operand (input/output).
2890; @param A2 Pointer to the second full sized media register operand, where we
2891; will only use the upper half (input).
2892;
2893%macro IEMIMPL_MEDIA_F1H1 2
2894 %if %2 != 0
2895BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2896 PROLOGUE_3_ARGS
2897 IEMIMPL_MMX_PROLOGUE
2898
2899 movq mm0, [A1]
2900 movq mm1, [A2]
2901 %1 mm0, mm1
2902 movq [A1], mm0
2903
2904 IEMIMPL_MMX_EPILOGUE
2905 EPILOGUE_3_ARGS
2906ENDPROC iemAImpl_ %+ %1 %+ _u64
2907 %endif
2908
2909BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2910 PROLOGUE_3_ARGS
2911 IEMIMPL_SSE_PROLOGUE
2912
2913 movdqu xmm0, [A1]
2914 movdqu xmm1, [A2]
2915 %1 xmm0, xmm1
2916 movdqu [A1], xmm0
2917
2918 IEMIMPL_SSE_EPILOGUE
2919 EPILOGUE_3_ARGS
2920ENDPROC iemAImpl_ %+ %1 %+ _u128
2921%endmacro
2922
2923IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2924IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2925IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2926IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2927
2928
2929;
2930; Shufflers with evil 8-bit immediates.
2931;
2932
2933BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2934 PROLOGUE_4_ARGS
2935 IEMIMPL_MMX_PROLOGUE
2936
2937 movq mm0, [A1]
2938 movq mm1, [A2]
2939 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2940 lea T1, [.imm0 xWrtRIP]
2941 lea T1, [T1 + T0]
2942 call T1
2943 movq [A1], mm0
2944
2945 IEMIMPL_MMX_EPILOGUE
2946 EPILOGUE_4_ARGS
2947%assign bImm 0
2948%rep 256
2949.imm %+ bImm:
2950 pshufw mm0, mm1, bImm
2951 ret
2952 %assign bImm bImm + 1
2953%endrep
2954.immEnd: ; 256*5 == 0x500
2955dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2956dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2957ENDPROC iemAImpl_pshufw
2958
2959
2960%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2961BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2962 PROLOGUE_4_ARGS
2963 IEMIMPL_SSE_PROLOGUE
2964
2965 movdqu xmm0, [A1]
2966 movdqu xmm1, [A2]
2967 lea T1, [.imm0 xWrtRIP]
2968 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
2969 lea T1, [T1 + T0*2]
2970 call T1
2971 movdqu [A1], xmm0
2972
2973 IEMIMPL_SSE_EPILOGUE
2974 EPILOGUE_4_ARGS
2975 %assign bImm 0
2976 %rep 256
2977.imm %+ bImm:
2978 %1 xmm0, xmm1, bImm
2979 ret
2980 %assign bImm bImm + 1
2981 %endrep
2982.immEnd: ; 256*6 == 0x600
2983dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2984dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2985ENDPROC iemAImpl_ %+ %1
2986%endmacro
2987
2988IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2989IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2990IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2991
2992
2993;
2994; Move byte mask.
2995;
2996
2997BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2998 PROLOGUE_3_ARGS
2999 IEMIMPL_MMX_PROLOGUE
3000
3001 mov T0, [A1]
3002 movq mm1, [A2]
3003 pmovmskb T0, mm1
3004 mov [A1], T0
3005%ifdef RT_ARCH_X86
3006 mov dword [A1 + 4], 0
3007%endif
3008 IEMIMPL_MMX_EPILOGUE
3009 EPILOGUE_3_ARGS
3010ENDPROC iemAImpl_pmovmskb_u64
3011
3012BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3013 PROLOGUE_3_ARGS
3014 IEMIMPL_SSE_PROLOGUE
3015
3016 mov T0, [A1]
3017 movdqu xmm1, [A2]
3018 pmovmskb T0, xmm1
3019 mov [A1], T0
3020%ifdef RT_ARCH_X86
3021 mov dword [A1 + 4], 0
3022%endif
3023 IEMIMPL_SSE_EPILOGUE
3024 EPILOGUE_3_ARGS
3025ENDPROC iemAImpl_pmovmskb_u128
3026
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette