VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 94169

Last change on this file since 94169 was 94164, checked in by vboxsync, 3 years ago

VMM/IEM: fixed bug in cmpxchg16b worker for gcc targets.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 92.0 KB
Line 
1; $Id: IEMAllAImpl.asm 94164 2022-03-11 09:05:10Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;*********************************************************************************************************************************
20;* Header Files *
21;*********************************************************************************************************************************
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;*********************************************************************************************************************************
28;* Defined Constants And Macros *
29;*********************************************************************************************************************************
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175 %define T2 r10 ; only AMD64
176 %define T2_32 r10d
177 %define T2_16 r10w
178 %define T2_8 r10b
179
180%else
181 ; x86
182 %macro PROLOGUE_1_ARGS 0
183 push edi
184 %endmacro
185 %macro EPILOGUE_1_ARGS 0
186 pop edi
187 ret 0
188 %endmacro
189 %macro EPILOGUE_1_ARGS_EX 1
190 pop edi
191 ret %1
192 %endmacro
193
194 %macro PROLOGUE_2_ARGS 0
195 push edi
196 %endmacro
197 %macro EPILOGUE_2_ARGS 0
198 pop edi
199 ret 0
200 %endmacro
201 %macro EPILOGUE_2_ARGS_EX 1
202 pop edi
203 ret %1
204 %endmacro
205
206 %macro PROLOGUE_3_ARGS 0
207 push ebx
208 mov ebx, [esp + 4 + 4]
209 push edi
210 %endmacro
211 %macro EPILOGUE_3_ARGS_EX 1
212 %if (%1) < 4
213 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
214 %endif
215 pop edi
216 pop ebx
217 ret %1
218 %endmacro
219 %macro EPILOGUE_3_ARGS 0
220 EPILOGUE_3_ARGS_EX 4
221 %endmacro
222
223 %macro PROLOGUE_4_ARGS 0
224 push ebx
225 push edi
226 push esi
227 mov ebx, [esp + 12 + 4 + 0]
228 mov esi, [esp + 12 + 4 + 4]
229 %endmacro
230 %macro EPILOGUE_4_ARGS_EX 1
231 %if (%1) < 8
232 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
233 %endif
234 pop esi
235 pop edi
236 pop ebx
237 ret %1
238 %endmacro
239 %macro EPILOGUE_4_ARGS 0
240 EPILOGUE_4_ARGS_EX 8
241 %endmacro
242
243 %define A0 ecx
244 %define A0_32 ecx
245 %define A0_16 cx
246 %define A0_8 cl
247
248 %define A1 edx
249 %define A1_32 edx
250 %define A1_16 dx
251 %define A1_8 dl
252
253 %define A2 ebx
254 %define A2_32 ebx
255 %define A2_16 bx
256 %define A2_8 bl
257
258 %define A3 esi
259 %define A3_32 esi
260 %define A3_16 si
261
262 %define T0 eax
263 %define T0_32 eax
264 %define T0_16 ax
265 %define T0_8 al
266
267 %define T1 edi
268 %define T1_32 edi
269 %define T1_16 di
270%endif
271
272
273;;
274; Load the relevant flags from [%1] if there are undefined flags (%3).
275;
276; @remarks Clobbers T0, stack. Changes EFLAGS.
277; @param A2 The register pointing to the flags.
278; @param 1 The parameter (A0..A3) pointing to the eflags.
279; @param 2 The set of modified flags.
280; @param 3 The set of undefined flags.
281;
282%macro IEM_MAYBE_LOAD_FLAGS 3
283 ;%if (%3) != 0
284 pushf ; store current flags
285 mov T0_32, [%1] ; load the guest flags
286 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
287 and T0_32, (%2 | %3) ; select the modified and undefined flags.
288 or [xSP], T0 ; merge guest flags with host flags.
289 popf ; load the mixed flags.
290 ;%endif
291%endmacro
292
293;;
294; Update the flag.
295;
296; @remarks Clobbers T0, T1, stack.
297; @param 1 The register pointing to the EFLAGS.
298; @param 2 The mask of modified flags to save.
299; @param 3 The mask of undefined flags to (maybe) save.
300;
301%macro IEM_SAVE_FLAGS 3
302 %if (%2 | %3) != 0
303 pushf
304 pop T1
305 mov T0_32, [%1] ; flags
306 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
307 and T1_32, (%2 | %3) ; select the modified and undefined flags.
308 or T0_32, T1_32 ; combine the flags.
309 mov [%1], T0_32 ; save the flags.
310 %endif
311%endmacro
312
313;;
314; Calculates the new EFLAGS based on the CPU EFLAGS and fixed clear and set bit masks.
315;
316; @remarks Clobbers T0, T1, stack.
317; @param 1 The register pointing to the EFLAGS.
318; @param 2 The mask of modified flags to save.
319; @param 3 Mask of additional flags to always clear
320; @param 4 Mask of additional flags to always set.
321;
322%macro IEM_SAVE_AND_ADJUST_FLAGS 4
323 %if (%2 | %3 | %4) != 0
324 pushf
325 pop T1
326 mov T0_32, [%1] ; load flags.
327 and T0_32, ~(%2 | %3) ; clear the modified and always cleared flags.
328 and T1_32, (%2) ; select the modified flags.
329 or T0_32, T1_32 ; combine the flags.
330 %if (%4) != 0
331 or T0_32, %4 ; add the always set flags.
332 %endif
333 mov [%1], T0_32 ; save the result.
334 %endif
335%endmacro
336
337;;
338; Calculates the new EFLAGS using fixed clear and set bit masks.
339;
340; @remarks Clobbers T0.
341; @param 1 The register pointing to the EFLAGS.
342; @param 2 Mask of additional flags to always clear
343; @param 3 Mask of additional flags to always set.
344;
345%macro IEM_ADJUST_FLAGS 3
346 %if (%2 | %3) != 0
347 mov T0_32, [%1] ; Load flags.
348 %if (%2) != 0
349 and T0_32, ~(%2) ; Remove the always cleared flags.
350 %endif
351 %if (%3) != 0
352 or T0_32, %3 ; Add the always set flags.
353 %endif
354 mov [%1], T0_32 ; Save the result.
355 %endif
356%endmacro
357
358;;
359; Calculates the new EFLAGS using fixed clear and set bit masks.
360;
361; @remarks Clobbers T0, %4.
362; @param 1 The register pointing to the EFLAGS.
363; @param 2 Mask of additional flags to always clear
364; @param 3 Mask of additional flags to always set.
365; @param 4 The (full) register containing the parity table index. Will be modified!
366;
367%macro IEM_ADJUST_FLAGS_WITH_PARITY 4
368 mov T0_32, [%1] ; Load flags.
369 and T0_32, ~(%2 | X86_EFL_PF) ; Remove PF and the always cleared flags.
370 %if (%3) != 0
371 or T0_32, %3 ; Add the always set flags.
372 %endif
373 and %4, 0xff
374 %ifdef RT_ARCH_AMD64
375 lea T2, [NAME(g_afParity) xWrtRIP]
376 or T0_8, [T2 + %4]
377 %else
378 or T0_8, [NAME(g_afParity) + %4]
379 %endif
380 mov [%1], T0_32 ; Save the result.
381%endmacro
382
383
384;*********************************************************************************************************************************
385;* External Symbols *
386;*********************************************************************************************************************************
387extern NAME(g_afParity)
388
389
390;;
391; Macro for implementing a binary operator.
392;
393; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
394; variants, except on 32-bit system where the 64-bit accesses requires hand
395; coding.
396;
397; All the functions takes a pointer to the destination memory operand in A0,
398; the source register operand in A1 and a pointer to eflags in A2.
399;
400; @param 1 The instruction mnemonic.
401; @param 2 Non-zero if there should be a locked version.
402; @param 3 The modified flags.
403; @param 4 The undefined flags.
404;
405%macro IEMIMPL_BIN_OP 4
406BEGINCODE
407BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
408 PROLOGUE_3_ARGS
409 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
410 %1 byte [A0], A1_8
411 IEM_SAVE_FLAGS A2, %3, %4
412 EPILOGUE_3_ARGS
413ENDPROC iemAImpl_ %+ %1 %+ _u8
414
415BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
416 PROLOGUE_3_ARGS
417 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
418 %1 word [A0], A1_16
419 IEM_SAVE_FLAGS A2, %3, %4
420 EPILOGUE_3_ARGS
421ENDPROC iemAImpl_ %+ %1 %+ _u16
422
423BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
424 PROLOGUE_3_ARGS
425 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
426 %1 dword [A0], A1_32
427 IEM_SAVE_FLAGS A2, %3, %4
428 EPILOGUE_3_ARGS
429ENDPROC iemAImpl_ %+ %1 %+ _u32
430
431 %ifdef RT_ARCH_AMD64
432BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
433 PROLOGUE_3_ARGS
434 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
435 %1 qword [A0], A1
436 IEM_SAVE_FLAGS A2, %3, %4
437 EPILOGUE_3_ARGS_EX 8
438ENDPROC iemAImpl_ %+ %1 %+ _u64
439 %endif ; RT_ARCH_AMD64
440
441 %if %2 != 0 ; locked versions requested?
442
443BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
444 PROLOGUE_3_ARGS
445 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
446 lock %1 byte [A0], A1_8
447 IEM_SAVE_FLAGS A2, %3, %4
448 EPILOGUE_3_ARGS
449ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
450
451BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
452 PROLOGUE_3_ARGS
453 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
454 lock %1 word [A0], A1_16
455 IEM_SAVE_FLAGS A2, %3, %4
456 EPILOGUE_3_ARGS
457ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
458
459BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
460 PROLOGUE_3_ARGS
461 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
462 lock %1 dword [A0], A1_32
463 IEM_SAVE_FLAGS A2, %3, %4
464 EPILOGUE_3_ARGS
465ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
466
467 %ifdef RT_ARCH_AMD64
468BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
469 PROLOGUE_3_ARGS
470 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
471 lock %1 qword [A0], A1
472 IEM_SAVE_FLAGS A2, %3, %4
473 EPILOGUE_3_ARGS_EX 8
474ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
475 %endif ; RT_ARCH_AMD64
476 %endif ; locked
477%endmacro
478
479; instr,lock,modified-flags.
480IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
481IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
482IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
483IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
484IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
485IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
486IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
487IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
488IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
489
490
491;;
492; Macro for implementing a bit operator.
493;
494; This will generate code for the 16, 32 and 64 bit accesses with locked
495; variants, except on 32-bit system where the 64-bit accesses requires hand
496; coding.
497;
498; All the functions takes a pointer to the destination memory operand in A0,
499; the source register operand in A1 and a pointer to eflags in A2.
500;
501; @param 1 The instruction mnemonic.
502; @param 2 Non-zero if there should be a locked version.
503; @param 3 The modified flags.
504; @param 4 The undefined flags.
505;
506%macro IEMIMPL_BIT_OP 4
507BEGINCODE
508BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
509 PROLOGUE_3_ARGS
510 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
511 %1 word [A0], A1_16
512 IEM_SAVE_FLAGS A2, %3, %4
513 EPILOGUE_3_ARGS
514ENDPROC iemAImpl_ %+ %1 %+ _u16
515
516BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
517 PROLOGUE_3_ARGS
518 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
519 %1 dword [A0], A1_32
520 IEM_SAVE_FLAGS A2, %3, %4
521 EPILOGUE_3_ARGS
522ENDPROC iemAImpl_ %+ %1 %+ _u32
523
524 %ifdef RT_ARCH_AMD64
525BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
526 PROLOGUE_3_ARGS
527 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
528 %1 qword [A0], A1
529 IEM_SAVE_FLAGS A2, %3, %4
530 EPILOGUE_3_ARGS_EX 8
531ENDPROC iemAImpl_ %+ %1 %+ _u64
532 %endif ; RT_ARCH_AMD64
533
534 %if %2 != 0 ; locked versions requested?
535
536BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
537 PROLOGUE_3_ARGS
538 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
539 lock %1 word [A0], A1_16
540 IEM_SAVE_FLAGS A2, %3, %4
541 EPILOGUE_3_ARGS
542ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
543
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
547 lock %1 dword [A0], A1_32
548 IEM_SAVE_FLAGS A2, %3, %4
549 EPILOGUE_3_ARGS
550ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
551
552 %ifdef RT_ARCH_AMD64
553BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
554 PROLOGUE_3_ARGS
555 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
556 lock %1 qword [A0], A1
557 IEM_SAVE_FLAGS A2, %3, %4
558 EPILOGUE_3_ARGS_EX 8
559ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
560 %endif ; RT_ARCH_AMD64
561 %endif ; locked
562%endmacro
563IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
564IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
565IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
566IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
567
568;;
569; Macro for implementing a bit search operator.
570;
571; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
572; system where the 64-bit accesses requires hand coding.
573;
574; All the functions takes a pointer to the destination memory operand in A0,
575; the source register operand in A1 and a pointer to eflags in A2.
576;
577; In the ZF case the destination register is 'undefined', however it seems that
578; both AMD and Intel just leaves it as is. The undefined EFLAGS differs between
579; AMD and Intel and accoridng to https://www.sandpile.org/x86/flags.htm between
580; Intel microarchitectures. We only implement 'intel' and 'amd' variation with
581; the behaviour of more recent CPUs (Intel 10980X and AMD 3990X).
582;
583; @param 1 The instruction mnemonic.
584; @param 2 The modified flags.
585; @param 3 The undefined flags.
586;
587%macro IEMIMPL_BIT_OP 3
588BEGINCODE
589BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
590 PROLOGUE_3_ARGS
591 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
592 %1 T0_16, A1_16
593 jz .unchanged_dst
594 mov [A0], T0_16
595.unchanged_dst:
596 IEM_SAVE_FLAGS A2, %2, %3
597 EPILOGUE_3_ARGS
598ENDPROC iemAImpl_ %+ %1 %+ _u16
599
600BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12
601 PROLOGUE_3_ARGS
602 %1 T1_16, A1_16
603 jz .unchanged_dst
604 mov [A0], T1_16
605 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
606 EPILOGUE_3_ARGS
607.unchanged_dst:
608 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
609 EPILOGUE_3_ARGS
610ENDPROC iemAImpl_ %+ %1 %+ _u16_intel
611
612BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12
613 PROLOGUE_3_ARGS
614 %1 T0_16, A1_16
615 jz .unchanged_dst
616 mov [A0], T0_16
617.unchanged_dst:
618 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
619 EPILOGUE_3_ARGS
620ENDPROC iemAImpl_ %+ %1 %+ _u16_amd
621
622
623BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
624 PROLOGUE_3_ARGS
625 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
626 %1 T0_32, A1_32
627 jz .unchanged_dst
628 mov [A0], T0_32
629.unchanged_dst:
630 IEM_SAVE_FLAGS A2, %2, %3
631 EPILOGUE_3_ARGS
632ENDPROC iemAImpl_ %+ %1 %+ _u32
633
634BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 12
635 PROLOGUE_3_ARGS
636 %1 T1_32, A1_32
637 jz .unchanged_dst
638 mov [A0], T1_32
639 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
640 EPILOGUE_3_ARGS
641.unchanged_dst:
642 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
643 EPILOGUE_3_ARGS
644ENDPROC iemAImpl_ %+ %1 %+ _u32_intel
645
646BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 12
647 PROLOGUE_3_ARGS
648 %1 T0_32, A1_32
649 jz .unchanged_dst
650 mov [A0], T0_32
651.unchanged_dst:
652 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
653 EPILOGUE_3_ARGS
654ENDPROC iemAImpl_ %+ %1 %+ _u32_amd
655
656
657 %ifdef RT_ARCH_AMD64
658
659BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
660 PROLOGUE_3_ARGS
661 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
662 %1 T0, A1
663 jz .unchanged_dst
664 mov [A0], T0
665.unchanged_dst:
666 IEM_SAVE_FLAGS A2, %2, %3
667 EPILOGUE_3_ARGS_EX 8
668ENDPROC iemAImpl_ %+ %1 %+ _u64
669
670BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16
671 PROLOGUE_3_ARGS
672 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
673 %1 T1, A1
674 jz .unchanged_dst
675 mov [A0], T1
676 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
677 EPILOGUE_3_ARGS
678.unchanged_dst:
679 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
680 EPILOGUE_3_ARGS
681ENDPROC iemAImpl_ %+ %1 %+ _u64_intel
682
683BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16
684 PROLOGUE_3_ARGS
685 %1 T0, A1
686 jz .unchanged_dst
687 mov [A0], T0
688.unchanged_dst:
689 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
690 EPILOGUE_3_ARGS_EX 8
691ENDPROC iemAImpl_ %+ %1 %+ _u64_amd
692
693 %endif ; RT_ARCH_AMD64
694%endmacro
695
696IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
697IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
698
699
700;
701; IMUL is also a similar but yet different case (no lock, no mem dst).
702; The rDX:rAX variant of imul is handled together with mul further down.
703;
704BEGINCODE
705BEGINPROC_FASTCALL iemAImpl_imul_two_u16_intel, 12
706BEGINPROC_FASTCALL iemAImpl_imul_two_u16_amd, 12
707BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
708 PROLOGUE_3_ARGS
709 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
710 imul A1_16, word [A0]
711 mov [A0], A1_16
712 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
713 EPILOGUE_3_ARGS
714ENDPROC iemAImpl_imul_two_u16
715
716BEGINPROC_FASTCALL iemAImpl_imul_two_u32_intel, 12
717BEGINPROC_FASTCALL iemAImpl_imul_two_u32_amd, 12
718BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
719 PROLOGUE_3_ARGS
720 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
721 imul A1_32, dword [A0]
722 mov [A0], A1_32
723 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
724 EPILOGUE_3_ARGS
725ENDPROC iemAImpl_imul_two_u32
726
727%ifdef RT_ARCH_AMD64
728BEGINPROC_FASTCALL iemAImpl_imul_two_u64_intel, 16
729BEGINPROC_FASTCALL iemAImpl_imul_two_u64_amd, 16
730BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
731 PROLOGUE_3_ARGS
732 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
733 imul A1, qword [A0]
734 mov [A0], A1
735 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
736 EPILOGUE_3_ARGS_EX 8
737ENDPROC iemAImpl_imul_two_u64
738%endif ; RT_ARCH_AMD64
739
740
741;
742; XCHG for memory operands. This implies locking. No flag changes.
743;
744; Each function takes two arguments, first the pointer to the memory,
745; then the pointer to the register. They all return void.
746;
747BEGINCODE
748BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
749 PROLOGUE_2_ARGS
750 mov T0_8, [A1]
751 xchg [A0], T0_8
752 mov [A1], T0_8
753 EPILOGUE_2_ARGS
754ENDPROC iemAImpl_xchg_u8_locked
755
756BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
757 PROLOGUE_2_ARGS
758 mov T0_16, [A1]
759 xchg [A0], T0_16
760 mov [A1], T0_16
761 EPILOGUE_2_ARGS
762ENDPROC iemAImpl_xchg_u16_locked
763
764BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
765 PROLOGUE_2_ARGS
766 mov T0_32, [A1]
767 xchg [A0], T0_32
768 mov [A1], T0_32
769 EPILOGUE_2_ARGS
770ENDPROC iemAImpl_xchg_u32_locked
771
772%ifdef RT_ARCH_AMD64
773BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
774 PROLOGUE_2_ARGS
775 mov T0, [A1]
776 xchg [A0], T0
777 mov [A1], T0
778 EPILOGUE_2_ARGS
779ENDPROC iemAImpl_xchg_u64_locked
780%endif
781
782; Unlocked variants for fDisregardLock mode.
783
784BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
785 PROLOGUE_2_ARGS
786 mov T0_8, [A1]
787 mov T1_8, [A0]
788 mov [A0], T0_8
789 mov [A1], T1_8
790 EPILOGUE_2_ARGS
791ENDPROC iemAImpl_xchg_u8_unlocked
792
793BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
794 PROLOGUE_2_ARGS
795 mov T0_16, [A1]
796 mov T1_16, [A0]
797 mov [A0], T0_16
798 mov [A1], T1_16
799 EPILOGUE_2_ARGS
800ENDPROC iemAImpl_xchg_u16_unlocked
801
802BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
803 PROLOGUE_2_ARGS
804 mov T0_32, [A1]
805 mov T1_32, [A0]
806 mov [A0], T0_32
807 mov [A1], T1_32
808 EPILOGUE_2_ARGS
809ENDPROC iemAImpl_xchg_u32_unlocked
810
811%ifdef RT_ARCH_AMD64
812BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
813 PROLOGUE_2_ARGS
814 mov T0, [A1]
815 mov T1, [A0]
816 mov [A0], T0
817 mov [A1], T1
818 EPILOGUE_2_ARGS
819ENDPROC iemAImpl_xchg_u64_unlocked
820%endif
821
822
823;
824; XADD for memory operands.
825;
826; Each function takes three arguments, first the pointer to the
827; memory/register, then the pointer to the register, and finally a pointer to
828; eflags. They all return void.
829;
830BEGINCODE
831BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
832 PROLOGUE_3_ARGS
833 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
834 mov T0_8, [A1]
835 xadd [A0], T0_8
836 mov [A1], T0_8
837 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
838 EPILOGUE_3_ARGS
839ENDPROC iemAImpl_xadd_u8
840
841BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
842 PROLOGUE_3_ARGS
843 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
844 mov T0_16, [A1]
845 xadd [A0], T0_16
846 mov [A1], T0_16
847 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
848 EPILOGUE_3_ARGS
849ENDPROC iemAImpl_xadd_u16
850
851BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
852 PROLOGUE_3_ARGS
853 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
854 mov T0_32, [A1]
855 xadd [A0], T0_32
856 mov [A1], T0_32
857 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
858 EPILOGUE_3_ARGS
859ENDPROC iemAImpl_xadd_u32
860
861%ifdef RT_ARCH_AMD64
862BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
863 PROLOGUE_3_ARGS
864 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
865 mov T0, [A1]
866 xadd [A0], T0
867 mov [A1], T0
868 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
869 EPILOGUE_3_ARGS
870ENDPROC iemAImpl_xadd_u64
871%endif ; RT_ARCH_AMD64
872
873BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
874 PROLOGUE_3_ARGS
875 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
876 mov T0_8, [A1]
877 lock xadd [A0], T0_8
878 mov [A1], T0_8
879 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
880 EPILOGUE_3_ARGS
881ENDPROC iemAImpl_xadd_u8_locked
882
883BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
884 PROLOGUE_3_ARGS
885 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
886 mov T0_16, [A1]
887 lock xadd [A0], T0_16
888 mov [A1], T0_16
889 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
890 EPILOGUE_3_ARGS
891ENDPROC iemAImpl_xadd_u16_locked
892
893BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
894 PROLOGUE_3_ARGS
895 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
896 mov T0_32, [A1]
897 lock xadd [A0], T0_32
898 mov [A1], T0_32
899 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
900 EPILOGUE_3_ARGS
901ENDPROC iemAImpl_xadd_u32_locked
902
903%ifdef RT_ARCH_AMD64
904BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
905 PROLOGUE_3_ARGS
906 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
907 mov T0, [A1]
908 lock xadd [A0], T0
909 mov [A1], T0
910 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
911 EPILOGUE_3_ARGS
912ENDPROC iemAImpl_xadd_u64_locked
913%endif ; RT_ARCH_AMD64
914
915
916;
917; CMPXCHG8B.
918;
919; These are tricky register wise, so the code is duplicated for each calling
920; convention.
921;
922; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
923;
924; C-proto:
925; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
926; uint32_t *pEFlags));
927;
928; Note! Identical to iemAImpl_cmpxchg16b.
929;
930BEGINCODE
931BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
932%ifdef RT_ARCH_AMD64
933 %ifdef ASM_CALL64_MSC
934 push rbx
935
936 mov r11, rdx ; pu64EaxEdx (is also T1)
937 mov r10, rcx ; pu64Dst
938
939 mov ebx, [r8]
940 mov ecx, [r8 + 4]
941 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
942 mov eax, [r11]
943 mov edx, [r11 + 4]
944
945 lock cmpxchg8b [r10]
946
947 mov [r11], eax
948 mov [r11 + 4], edx
949 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
950
951 pop rbx
952 ret
953 %else
954 push rbx
955
956 mov r10, rcx ; pEFlags
957 mov r11, rdx ; pu64EbxEcx (is also T1)
958
959 mov ebx, [r11]
960 mov ecx, [r11 + 4]
961 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
962 mov eax, [rsi]
963 mov edx, [rsi + 4]
964
965 lock cmpxchg8b [rdi]
966
967 mov [rsi], eax
968 mov [rsi + 4], edx
969 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
970
971 pop rbx
972 ret
973
974 %endif
975%else
976 push esi
977 push edi
978 push ebx
979 push ebp
980
981 mov edi, ecx ; pu64Dst
982 mov esi, edx ; pu64EaxEdx
983 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
984 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
985
986 mov ebx, [ecx]
987 mov ecx, [ecx + 4]
988 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
989 mov eax, [esi]
990 mov edx, [esi + 4]
991
992 lock cmpxchg8b [edi]
993
994 mov [esi], eax
995 mov [esi + 4], edx
996 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
997
998 pop ebp
999 pop ebx
1000 pop edi
1001 pop esi
1002 ret 8
1003%endif
1004ENDPROC iemAImpl_cmpxchg8b
1005
1006BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
1007 ; Lazy bird always lock prefixes cmpxchg8b.
1008 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
1009ENDPROC iemAImpl_cmpxchg8b_locked
1010
1011%ifdef RT_ARCH_AMD64
1012
1013;
1014; CMPXCHG16B.
1015;
1016; These are tricky register wise, so the code is duplicated for each calling
1017; convention.
1018;
1019; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1020;
1021; C-proto:
1022; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
1023; uint32_t *pEFlags));
1024;
1025; Note! Identical to iemAImpl_cmpxchg8b.
1026;
1027BEGINCODE
1028BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
1029 %ifdef ASM_CALL64_MSC
1030 push rbx
1031
1032 mov r11, rdx ; pu64RaxRdx (is also T1)
1033 mov r10, rcx ; pu64Dst
1034
1035 mov rbx, [r8]
1036 mov rcx, [r8 + 8]
1037 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1038 mov rax, [r11]
1039 mov rdx, [r11 + 8]
1040
1041 lock cmpxchg16b [r10]
1042
1043 mov [r11], rax
1044 mov [r11 + 8], rdx
1045 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1046
1047 pop rbx
1048 ret
1049 %else
1050 push rbx
1051
1052 mov r10, rcx ; pEFlags
1053 mov r11, rdx ; pu64RbxRcx (is also T1)
1054
1055 mov rbx, [r11]
1056 mov rcx, [r11 + 8]
1057 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1058 mov rax, [rsi]
1059 mov rdx, [rsi + 8]
1060
1061 lock cmpxchg16b [rdi]
1062
1063 mov [rsi], rax
1064 mov [rsi + 8], rdx
1065 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1066
1067 pop rbx
1068 ret
1069
1070 %endif
1071ENDPROC iemAImpl_cmpxchg16b
1072
1073BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
1074 ; Lazy bird always lock prefixes cmpxchg16b.
1075 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
1076ENDPROC iemAImpl_cmpxchg16b_locked
1077
1078%endif ; RT_ARCH_AMD64
1079
1080
1081;
1082; CMPXCHG.
1083;
1084; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1085;
1086; C-proto:
1087; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
1088;
1089BEGINCODE
1090%macro IEMIMPL_CMPXCHG 2
1091BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
1092 PROLOGUE_4_ARGS
1093 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1094 mov al, [A1]
1095 %1 cmpxchg [A0], A2_8
1096 mov [A1], al
1097 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1098 EPILOGUE_4_ARGS
1099ENDPROC iemAImpl_cmpxchg_u8 %+ %2
1100
1101BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
1102 PROLOGUE_4_ARGS
1103 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1104 mov ax, [A1]
1105 %1 cmpxchg [A0], A2_16
1106 mov [A1], ax
1107 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1108 EPILOGUE_4_ARGS
1109ENDPROC iemAImpl_cmpxchg_u16 %+ %2
1110
1111BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
1112 PROLOGUE_4_ARGS
1113 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1114 mov eax, [A1]
1115 %1 cmpxchg [A0], A2_32
1116 mov [A1], eax
1117 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1118 EPILOGUE_4_ARGS
1119ENDPROC iemAImpl_cmpxchg_u32 %+ %2
1120
1121BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
1122%ifdef RT_ARCH_AMD64
1123 PROLOGUE_4_ARGS
1124 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1125 mov rax, [A1]
1126 %1 cmpxchg [A0], A2
1127 mov [A1], rax
1128 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1129 EPILOGUE_4_ARGS
1130%else
1131 ;
1132 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
1133 ;
1134 push esi
1135 push edi
1136 push ebx
1137 push ebp
1138
1139 mov edi, ecx ; pu64Dst
1140 mov esi, edx ; pu64Rax
1141 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
1142 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1143
1144 mov ebx, [ecx]
1145 mov ecx, [ecx + 4]
1146 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1147 mov eax, [esi]
1148 mov edx, [esi + 4]
1149
1150 lock cmpxchg8b [edi]
1151
1152 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
1153 jz .cmpxchg8b_not_equal
1154 cmp eax, eax ; just set the other flags.
1155.store:
1156 mov [esi], eax
1157 mov [esi + 4], edx
1158 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
1159
1160 pop ebp
1161 pop ebx
1162 pop edi
1163 pop esi
1164 ret 8
1165
1166.cmpxchg8b_not_equal:
1167 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1168 jne .store
1169 cmp [esi], eax
1170 jmp .store
1171
1172%endif
1173ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1174%endmacro ; IEMIMPL_CMPXCHG
1175
1176IEMIMPL_CMPXCHG , ,
1177IEMIMPL_CMPXCHG lock, _locked
1178
1179;;
1180; Macro for implementing a unary operator.
1181;
1182; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1183; variants, except on 32-bit system where the 64-bit accesses requires hand
1184; coding.
1185;
1186; All the functions takes a pointer to the destination memory operand in A0,
1187; the source register operand in A1 and a pointer to eflags in A2.
1188;
1189; @param 1 The instruction mnemonic.
1190; @param 2 The modified flags.
1191; @param 3 The undefined flags.
1192;
1193%macro IEMIMPL_UNARY_OP 3
1194BEGINCODE
1195BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1196 PROLOGUE_2_ARGS
1197 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1198 %1 byte [A0]
1199 IEM_SAVE_FLAGS A1, %2, %3
1200 EPILOGUE_2_ARGS
1201ENDPROC iemAImpl_ %+ %1 %+ _u8
1202
1203BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1204 PROLOGUE_2_ARGS
1205 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1206 lock %1 byte [A0]
1207 IEM_SAVE_FLAGS A1, %2, %3
1208 EPILOGUE_2_ARGS
1209ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1210
1211BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1212 PROLOGUE_2_ARGS
1213 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1214 %1 word [A0]
1215 IEM_SAVE_FLAGS A1, %2, %3
1216 EPILOGUE_2_ARGS
1217ENDPROC iemAImpl_ %+ %1 %+ _u16
1218
1219BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1220 PROLOGUE_2_ARGS
1221 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1222 lock %1 word [A0]
1223 IEM_SAVE_FLAGS A1, %2, %3
1224 EPILOGUE_2_ARGS
1225ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1226
1227BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1228 PROLOGUE_2_ARGS
1229 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1230 %1 dword [A0]
1231 IEM_SAVE_FLAGS A1, %2, %3
1232 EPILOGUE_2_ARGS
1233ENDPROC iemAImpl_ %+ %1 %+ _u32
1234
1235BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1236 PROLOGUE_2_ARGS
1237 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1238 lock %1 dword [A0]
1239 IEM_SAVE_FLAGS A1, %2, %3
1240 EPILOGUE_2_ARGS
1241ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1242
1243 %ifdef RT_ARCH_AMD64
1244BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1245 PROLOGUE_2_ARGS
1246 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1247 %1 qword [A0]
1248 IEM_SAVE_FLAGS A1, %2, %3
1249 EPILOGUE_2_ARGS
1250ENDPROC iemAImpl_ %+ %1 %+ _u64
1251
1252BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1253 PROLOGUE_2_ARGS
1254 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1255 lock %1 qword [A0]
1256 IEM_SAVE_FLAGS A1, %2, %3
1257 EPILOGUE_2_ARGS
1258ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1259 %endif ; RT_ARCH_AMD64
1260
1261%endmacro
1262
1263IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1264IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1265IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1266IEMIMPL_UNARY_OP not, 0, 0
1267
1268
1269;
1270; BSWAP. No flag changes.
1271;
1272; Each function takes one argument, pointer to the value to bswap
1273; (input/output). They all return void.
1274;
1275BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1276 PROLOGUE_1_ARGS
1277 mov T0_32, [A0] ; just in case any of the upper bits are used.
1278 db 66h
1279 bswap T0_32
1280 mov [A0], T0_32
1281 EPILOGUE_1_ARGS
1282ENDPROC iemAImpl_bswap_u16
1283
1284BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1285 PROLOGUE_1_ARGS
1286 mov T0_32, [A0]
1287 bswap T0_32
1288 mov [A0], T0_32
1289 EPILOGUE_1_ARGS
1290ENDPROC iemAImpl_bswap_u32
1291
1292BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1293%ifdef RT_ARCH_AMD64
1294 PROLOGUE_1_ARGS
1295 mov T0, [A0]
1296 bswap T0
1297 mov [A0], T0
1298 EPILOGUE_1_ARGS
1299%else
1300 PROLOGUE_1_ARGS
1301 mov T0, [A0]
1302 mov T1, [A0 + 4]
1303 bswap T0
1304 bswap T1
1305 mov [A0 + 4], T0
1306 mov [A0], T1
1307 EPILOGUE_1_ARGS
1308%endif
1309ENDPROC iemAImpl_bswap_u64
1310
1311
1312;;
1313; Macro for implementing a shift operation.
1314;
1315; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1316; 32-bit system where the 64-bit accesses requires hand coding.
1317;
1318; All the functions takes a pointer to the destination memory operand in A0,
1319; the shift count in A1 and a pointer to eflags in A2.
1320;
1321; @param 1 The instruction mnemonic.
1322; @param 2 The modified flags.
1323; @param 3 The undefined flags.
1324;
1325; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1326;
1327%macro IEMIMPL_SHIFT_OP 3
1328BEGINCODE
1329BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_intel, 12
1330BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_amd, 12
1331BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1332 PROLOGUE_3_ARGS
1333 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1334 %ifdef ASM_CALL64_GCC
1335 mov cl, A1_8
1336 %1 byte [A0], cl
1337 %else
1338 xchg A1, A0
1339 %1 byte [A1], cl
1340 %endif
1341 IEM_SAVE_FLAGS A2, %2, %3
1342 EPILOGUE_3_ARGS
1343ENDPROC iemAImpl_ %+ %1 %+ _u8
1344
1345BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_intel, 12
1346BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_amd, 12
1347BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1348 PROLOGUE_3_ARGS
1349 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1350 %ifdef ASM_CALL64_GCC
1351 mov cl, A1_8
1352 %1 word [A0], cl
1353 %else
1354 xchg A1, A0
1355 %1 word [A1], cl
1356 %endif
1357 IEM_SAVE_FLAGS A2, %2, %3
1358 EPILOGUE_3_ARGS
1359ENDPROC iemAImpl_ %+ %1 %+ _u16
1360
1361BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_intel, 12
1362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_amd, 12
1363BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1364 PROLOGUE_3_ARGS
1365 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1366 %ifdef ASM_CALL64_GCC
1367 mov cl, A1_8
1368 %1 dword [A0], cl
1369 %else
1370 xchg A1, A0
1371 %1 dword [A1], cl
1372 %endif
1373 IEM_SAVE_FLAGS A2, %2, %3
1374 EPILOGUE_3_ARGS
1375ENDPROC iemAImpl_ %+ %1 %+ _u32
1376
1377 %ifdef RT_ARCH_AMD64
1378BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_intel, 12
1379BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_amd, 12
1380BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1381 PROLOGUE_3_ARGS
1382 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1383 %ifdef ASM_CALL64_GCC
1384 mov cl, A1_8
1385 %1 qword [A0], cl
1386 %else
1387 xchg A1, A0
1388 %1 qword [A1], cl
1389 %endif
1390 IEM_SAVE_FLAGS A2, %2, %3
1391 EPILOGUE_3_ARGS
1392ENDPROC iemAImpl_ %+ %1 %+ _u64
1393 %endif ; RT_ARCH_AMD64
1394
1395%endmacro
1396
1397IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1398IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1399IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1400IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1401IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1402IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1403IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1404
1405
1406;;
1407; Macro for implementing a double precision shift operation.
1408;
1409; This will generate code for the 16, 32 and 64 bit accesses, except on
1410; 32-bit system where the 64-bit accesses requires hand coding.
1411;
1412; The functions takes the destination operand (r/m) in A0, the source (reg) in
1413; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1414;
1415; @param 1 The instruction mnemonic.
1416; @param 2 The modified flags.
1417; @param 3 The undefined flags.
1418;
1419; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1420;
1421%macro IEMIMPL_SHIFT_DBL_OP 3
1422BEGINCODE
1423BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_intel, 16
1424BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_amd, 16
1425BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1426 PROLOGUE_4_ARGS
1427 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1428 %ifdef ASM_CALL64_GCC
1429 xchg A3, A2
1430 %1 [A0], A1_16, cl
1431 xchg A3, A2
1432 %else
1433 xchg A0, A2
1434 %1 [A2], A1_16, cl
1435 %endif
1436 IEM_SAVE_FLAGS A3, %2, %3
1437 EPILOGUE_4_ARGS
1438ENDPROC iemAImpl_ %+ %1 %+ _u16
1439
1440BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_intel, 16
1441BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_amd, 16
1442BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1443 PROLOGUE_4_ARGS
1444 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1445 %ifdef ASM_CALL64_GCC
1446 xchg A3, A2
1447 %1 [A0], A1_32, cl
1448 xchg A3, A2
1449 %else
1450 xchg A0, A2
1451 %1 [A2], A1_32, cl
1452 %endif
1453 IEM_SAVE_FLAGS A3, %2, %3
1454 EPILOGUE_4_ARGS
1455ENDPROC iemAImpl_ %+ %1 %+ _u32
1456
1457 %ifdef RT_ARCH_AMD64
1458BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_intel, 20
1459BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_amd, 20
1460BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1461 PROLOGUE_4_ARGS
1462 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1463 %ifdef ASM_CALL64_GCC
1464 xchg A3, A2
1465 %1 [A0], A1, cl
1466 xchg A3, A2
1467 %else
1468 xchg A0, A2
1469 %1 [A2], A1, cl
1470 %endif
1471 IEM_SAVE_FLAGS A3, %2, %3
1472 EPILOGUE_4_ARGS_EX 12
1473ENDPROC iemAImpl_ %+ %1 %+ _u64
1474 %endif ; RT_ARCH_AMD64
1475
1476%endmacro
1477
1478IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1479IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1480
1481
1482;;
1483; Macro for implementing a multiplication operations.
1484;
1485; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1486; 32-bit system where the 64-bit accesses requires hand coding.
1487;
1488; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1489; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1490; pointer to eflags in A3.
1491;
1492; The functions all return 0 so the caller can be used for div/idiv as well as
1493; for the mul/imul implementation.
1494;
1495; @param 1 The instruction mnemonic.
1496; @param 2 The modified flags.
1497; @param 3 The undefined flags.
1498;
1499; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1500;
1501%macro IEMIMPL_MUL_OP 3
1502BEGINCODE
1503BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_intel, 12
1504BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_amd, 12
1505BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1506 PROLOGUE_3_ARGS
1507 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1508 mov al, [A0]
1509 %1 A1_8
1510 mov [A0], ax
1511 IEM_SAVE_FLAGS A2, %2, %3
1512 xor eax, eax
1513 EPILOGUE_3_ARGS
1514ENDPROC iemAImpl_ %+ %1 %+ _u8
1515
1516BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_intel, 16
1517BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_amd, 16
1518BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1519 PROLOGUE_4_ARGS
1520 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1521 mov ax, [A0]
1522 %ifdef ASM_CALL64_GCC
1523 %1 A2_16
1524 mov [A0], ax
1525 mov [A1], dx
1526 %else
1527 mov T1, A1
1528 %1 A2_16
1529 mov [A0], ax
1530 mov [T1], dx
1531 %endif
1532 IEM_SAVE_FLAGS A3, %2, %3
1533 xor eax, eax
1534 EPILOGUE_4_ARGS
1535ENDPROC iemAImpl_ %+ %1 %+ _u16
1536
1537BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_intel, 16
1538BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_amd, 16
1539BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1540 PROLOGUE_4_ARGS
1541 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1542 mov eax, [A0]
1543 %ifdef ASM_CALL64_GCC
1544 %1 A2_32
1545 mov [A0], eax
1546 mov [A1], edx
1547 %else
1548 mov T1, A1
1549 %1 A2_32
1550 mov [A0], eax
1551 mov [T1], edx
1552 %endif
1553 IEM_SAVE_FLAGS A3, %2, %3
1554 xor eax, eax
1555 EPILOGUE_4_ARGS
1556ENDPROC iemAImpl_ %+ %1 %+ _u32
1557
1558 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1559BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_intel, 20
1560BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_amd, 20
1561BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1562 PROLOGUE_4_ARGS
1563 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1564 mov rax, [A0]
1565 %ifdef ASM_CALL64_GCC
1566 %1 A2
1567 mov [A0], rax
1568 mov [A1], rdx
1569 %else
1570 mov T1, A1
1571 %1 A2
1572 mov [A0], rax
1573 mov [T1], rdx
1574 %endif
1575 IEM_SAVE_FLAGS A3, %2, %3
1576 xor eax, eax
1577 EPILOGUE_4_ARGS_EX 12
1578ENDPROC iemAImpl_ %+ %1 %+ _u64
1579 %endif ; !RT_ARCH_AMD64
1580
1581%endmacro
1582
1583IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1584IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1585
1586
1587BEGINCODE
1588;;
1589; Worker function for negating a 32-bit number in T1:T0
1590; @uses None (T0,T1)
1591BEGINPROC iemAImpl_negate_T0_T1_u32
1592 push 0
1593 push 0
1594 xchg T0_32, [xSP]
1595 xchg T1_32, [xSP + xCB]
1596 sub T0_32, [xSP]
1597 sbb T1_32, [xSP + xCB]
1598 add xSP, xCB*2
1599 ret
1600ENDPROC iemAImpl_negate_T0_T1_u32
1601
1602%ifdef RT_ARCH_AMD64
1603;;
1604; Worker function for negating a 64-bit number in T1:T0
1605; @uses None (T0,T1)
1606BEGINPROC iemAImpl_negate_T0_T1_u64
1607 push 0
1608 push 0
1609 xchg T0, [xSP]
1610 xchg T1, [xSP + xCB]
1611 sub T0, [xSP]
1612 sbb T1, [xSP + xCB]
1613 add xSP, xCB*2
1614 ret
1615ENDPROC iemAImpl_negate_T0_T1_u64
1616%endif
1617
1618
1619;;
1620; Macro for implementing a division operations.
1621;
1622; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1623; 32-bit system where the 64-bit accesses requires hand coding.
1624;
1625; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1626; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1627; pointer to eflags in A3.
1628;
1629; The functions all return 0 on success and -1 if a divide error should be
1630; raised by the caller.
1631;
1632; @param 1 The instruction mnemonic.
1633; @param 2 The modified flags.
1634; @param 3 The undefined flags.
1635; @param 4 1 if signed, 0 if unsigned.
1636;
1637; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1638;
1639%macro IEMIMPL_DIV_OP 4
1640BEGINCODE
1641BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_intel, 12
1642BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_amd, 12
1643BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1644 PROLOGUE_3_ARGS
1645
1646 ; div by chainsaw check.
1647 test A1_8, A1_8
1648 jz .div_zero
1649
1650 ; Overflow check - unsigned division is simple to verify, haven't
1651 ; found a simple way to check signed division yet unfortunately.
1652 %if %4 == 0
1653 cmp [A0 + 1], A1_8
1654 jae .div_overflow
1655 %else
1656 mov T0_16, [A0] ; T0 = dividend
1657 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1658 test A1_8, A1_8
1659 js .divisor_negative
1660 test T0_16, T0_16
1661 jns .both_positive
1662 neg T0_16
1663.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1664 push T0 ; Start off like unsigned below.
1665 shr T0_16, 7
1666 cmp T0_8, A1_8
1667 pop T0
1668 jb .div_no_overflow
1669 ja .div_overflow
1670 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1671 cmp T0_8, A1_8
1672 jae .div_overflow
1673 jmp .div_no_overflow
1674
1675.divisor_negative:
1676 neg A1_8
1677 test T0_16, T0_16
1678 jns .one_of_each
1679 neg T0_16
1680.both_positive: ; Same as unsigned shifted by sign indicator bit.
1681 shr T0_16, 7
1682 cmp T0_8, A1_8
1683 jae .div_overflow
1684.div_no_overflow:
1685 mov A1, T1 ; restore divisor
1686 %endif
1687
1688 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1689 mov ax, [A0]
1690 %1 A1_8
1691 mov [A0], ax
1692 IEM_SAVE_FLAGS A2, %2, %3
1693 xor eax, eax
1694
1695.return:
1696 EPILOGUE_3_ARGS
1697
1698.div_zero:
1699.div_overflow:
1700 mov eax, -1
1701 jmp .return
1702ENDPROC iemAImpl_ %+ %1 %+ _u8
1703
1704BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_intel, 16
1705BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_amd, 16
1706BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1707 PROLOGUE_4_ARGS
1708
1709 ; div by chainsaw check.
1710 test A2_16, A2_16
1711 jz .div_zero
1712
1713 ; Overflow check - unsigned division is simple to verify, haven't
1714 ; found a simple way to check signed division yet unfortunately.
1715 %if %4 == 0
1716 cmp [A1], A2_16
1717 jae .div_overflow
1718 %else
1719 mov T0_16, [A1]
1720 shl T0_32, 16
1721 mov T0_16, [A0] ; T0 = dividend
1722 mov T1, A2 ; T1 = divisor
1723 test T1_16, T1_16
1724 js .divisor_negative
1725 test T0_32, T0_32
1726 jns .both_positive
1727 neg T0_32
1728.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1729 push T0 ; Start off like unsigned below.
1730 shr T0_32, 15
1731 cmp T0_16, T1_16
1732 pop T0
1733 jb .div_no_overflow
1734 ja .div_overflow
1735 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1736 cmp T0_16, T1_16
1737 jae .div_overflow
1738 jmp .div_no_overflow
1739
1740.divisor_negative:
1741 neg T1_16
1742 test T0_32, T0_32
1743 jns .one_of_each
1744 neg T0_32
1745.both_positive: ; Same as unsigned shifted by sign indicator bit.
1746 shr T0_32, 15
1747 cmp T0_16, T1_16
1748 jae .div_overflow
1749.div_no_overflow:
1750 %endif
1751
1752 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1753 %ifdef ASM_CALL64_GCC
1754 mov T1, A2
1755 mov ax, [A0]
1756 mov dx, [A1]
1757 %1 T1_16
1758 mov [A0], ax
1759 mov [A1], dx
1760 %else
1761 mov T1, A1
1762 mov ax, [A0]
1763 mov dx, [T1]
1764 %1 A2_16
1765 mov [A0], ax
1766 mov [T1], dx
1767 %endif
1768 IEM_SAVE_FLAGS A3, %2, %3
1769 xor eax, eax
1770
1771.return:
1772 EPILOGUE_4_ARGS
1773
1774.div_zero:
1775.div_overflow:
1776 mov eax, -1
1777 jmp .return
1778ENDPROC iemAImpl_ %+ %1 %+ _u16
1779
1780BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_intel, 16
1781BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_amd, 16
1782BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1783 PROLOGUE_4_ARGS
1784
1785 ; div by chainsaw check.
1786 test A2_32, A2_32
1787 jz .div_zero
1788
1789 ; Overflow check - unsigned division is simple to verify, haven't
1790 ; found a simple way to check signed division yet unfortunately.
1791 %if %4 == 0
1792 cmp [A1], A2_32
1793 jae .div_overflow
1794 %else
1795 push A2 ; save A2 so we modify it (we out of regs on x86).
1796 mov T0_32, [A0] ; T0 = dividend low
1797 mov T1_32, [A1] ; T1 = dividend high
1798 test A2_32, A2_32
1799 js .divisor_negative
1800 test T1_32, T1_32
1801 jns .both_positive
1802 call NAME(iemAImpl_negate_T0_T1_u32)
1803.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1804 push T0 ; Start off like unsigned below.
1805 shl T1_32, 1
1806 shr T0_32, 31
1807 or T1_32, T0_32
1808 cmp T1_32, A2_32
1809 pop T0
1810 jb .div_no_overflow
1811 ja .div_overflow
1812 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1813 cmp T0_32, A2_32
1814 jae .div_overflow
1815 jmp .div_no_overflow
1816
1817.divisor_negative:
1818 neg A2_32
1819 test T1_32, T1_32
1820 jns .one_of_each
1821 call NAME(iemAImpl_negate_T0_T1_u32)
1822.both_positive: ; Same as unsigned shifted by sign indicator bit.
1823 shl T1_32, 1
1824 shr T0_32, 31
1825 or T1_32, T0_32
1826 cmp T1_32, A2_32
1827 jae .div_overflow
1828.div_no_overflow:
1829 pop A2
1830 %endif
1831
1832 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1833 mov eax, [A0]
1834 %ifdef ASM_CALL64_GCC
1835 mov T1, A2
1836 mov eax, [A0]
1837 mov edx, [A1]
1838 %1 T1_32
1839 mov [A0], eax
1840 mov [A1], edx
1841 %else
1842 mov T1, A1
1843 mov eax, [A0]
1844 mov edx, [T1]
1845 %1 A2_32
1846 mov [A0], eax
1847 mov [T1], edx
1848 %endif
1849 IEM_SAVE_FLAGS A3, %2, %3
1850 xor eax, eax
1851
1852.return:
1853 EPILOGUE_4_ARGS
1854
1855.div_overflow:
1856 %if %4 != 0
1857 pop A2
1858 %endif
1859.div_zero:
1860 mov eax, -1
1861 jmp .return
1862ENDPROC iemAImpl_ %+ %1 %+ _u32
1863
1864 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1865BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_intel, 20
1866BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_amd, 20
1867BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1868 PROLOGUE_4_ARGS
1869
1870 test A2, A2
1871 jz .div_zero
1872 %if %4 == 0
1873 cmp [A1], A2
1874 jae .div_overflow
1875 %else
1876 push A2 ; save A2 so we modify it (we out of regs on x86).
1877 mov T0, [A0] ; T0 = dividend low
1878 mov T1, [A1] ; T1 = dividend high
1879 test A2, A2
1880 js .divisor_negative
1881 test T1, T1
1882 jns .both_positive
1883 call NAME(iemAImpl_negate_T0_T1_u64)
1884.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1885 push T0 ; Start off like unsigned below.
1886 shl T1, 1
1887 shr T0, 63
1888 or T1, T0
1889 cmp T1, A2
1890 pop T0
1891 jb .div_no_overflow
1892 ja .div_overflow
1893 mov T1, 0x7fffffffffffffff
1894 and T0, T1 ; Special case for covering (divisor - 1).
1895 cmp T0, A2
1896 jae .div_overflow
1897 jmp .div_no_overflow
1898
1899.divisor_negative:
1900 neg A2
1901 test T1, T1
1902 jns .one_of_each
1903 call NAME(iemAImpl_negate_T0_T1_u64)
1904.both_positive: ; Same as unsigned shifted by sign indicator bit.
1905 shl T1, 1
1906 shr T0, 63
1907 or T1, T0
1908 cmp T1, A2
1909 jae .div_overflow
1910.div_no_overflow:
1911 pop A2
1912 %endif
1913
1914 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1915 mov rax, [A0]
1916 %ifdef ASM_CALL64_GCC
1917 mov T1, A2
1918 mov rax, [A0]
1919 mov rdx, [A1]
1920 %1 T1
1921 mov [A0], rax
1922 mov [A1], rdx
1923 %else
1924 mov T1, A1
1925 mov rax, [A0]
1926 mov rdx, [T1]
1927 %1 A2
1928 mov [A0], rax
1929 mov [T1], rdx
1930 %endif
1931 IEM_SAVE_FLAGS A3, %2, %3
1932 xor eax, eax
1933
1934.return:
1935 EPILOGUE_4_ARGS_EX 12
1936
1937.div_overflow:
1938 %if %4 != 0
1939 pop A2
1940 %endif
1941.div_zero:
1942 mov eax, -1
1943 jmp .return
1944ENDPROC iemAImpl_ %+ %1 %+ _u64
1945 %endif ; !RT_ARCH_AMD64
1946
1947%endmacro
1948
1949IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1950IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1951
1952
1953;;
1954; Macro for implementing memory fence operation.
1955;
1956; No return value, no operands or anything.
1957;
1958; @param 1 The instruction.
1959;
1960%macro IEMIMPL_MEM_FENCE 1
1961BEGINCODE
1962BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1963 %1
1964 ret
1965ENDPROC iemAImpl_ %+ %1
1966%endmacro
1967
1968IEMIMPL_MEM_FENCE lfence
1969IEMIMPL_MEM_FENCE sfence
1970IEMIMPL_MEM_FENCE mfence
1971
1972;;
1973; Alternative for non-SSE2 host.
1974;
1975BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1976 push xAX
1977 xchg xAX, [xSP]
1978 add xSP, xCB
1979 ret
1980ENDPROC iemAImpl_alt_mem_fence
1981
1982
1983;;
1984; Initialize the FPU for the actual instruction being emulated, this means
1985; loading parts of the guest's control word and status word.
1986;
1987; @uses 24 bytes of stack.
1988; @param 1 Expression giving the address of the FXSTATE of the guest.
1989;
1990%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1991 fnstenv [xSP]
1992
1993 ; FCW - for exception, precision and rounding control.
1994 movzx T0, word [%1 + X86FXSTATE.FCW]
1995 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1996 mov [xSP + X86FSTENV32P.FCW], T0_16
1997
1998 ; FSW - for undefined C0, C1, C2, and C3.
1999 movzx T1, word [%1 + X86FXSTATE.FSW]
2000 and T1, X86_FSW_C_MASK
2001 movzx T0, word [xSP + X86FSTENV32P.FSW]
2002 and T0, X86_FSW_TOP_MASK
2003 or T0, T1
2004 mov [xSP + X86FSTENV32P.FSW], T0_16
2005
2006 fldenv [xSP]
2007%endmacro
2008
2009
2010;;
2011; Need to move this as well somewhere better?
2012;
2013struc IEMFPURESULT
2014 .r80Result resw 5
2015 .FSW resw 1
2016endstruc
2017
2018
2019;;
2020; Need to move this as well somewhere better?
2021;
2022struc IEMFPURESULTTWO
2023 .r80Result1 resw 5
2024 .FSW resw 1
2025 .r80Result2 resw 5
2026endstruc
2027
2028
2029;
2030;---------------------- 16-bit signed integer operations ----------------------
2031;
2032
2033
2034;;
2035; Converts a 16-bit floating point value to a 80-bit one (fpu register).
2036;
2037; @param A0 FPU context (fxsave).
2038; @param A1 Pointer to a IEMFPURESULT for the output.
2039; @param A2 Pointer to the 16-bit floating point value to convert.
2040;
2041BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
2042 PROLOGUE_3_ARGS
2043 sub xSP, 20h
2044
2045 fninit
2046 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2047 fild word [A2]
2048
2049 fnstsw word [A1 + IEMFPURESULT.FSW]
2050 fnclex
2051 fstp tword [A1 + IEMFPURESULT.r80Result]
2052
2053 fninit
2054 add xSP, 20h
2055 EPILOGUE_3_ARGS
2056ENDPROC iemAImpl_fild_i16_to_r80
2057
2058
2059;;
2060; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
2061;
2062; @param A0 FPU context (fxsave).
2063; @param A1 Where to return the output FSW.
2064; @param A2 Where to store the 16-bit signed integer value.
2065; @param A3 Pointer to the 80-bit value.
2066;
2067BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
2068 PROLOGUE_4_ARGS
2069 sub xSP, 20h
2070
2071 fninit
2072 fld tword [A3]
2073 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2074 fistp word [A2]
2075
2076 fnstsw word [A1]
2077
2078 fninit
2079 add xSP, 20h
2080 EPILOGUE_4_ARGS
2081ENDPROC iemAImpl_fist_r80_to_i16
2082
2083
2084;;
2085; Store a 80-bit floating point value (register) as a 16-bit signed integer
2086; (memory) with truncation.
2087;
2088; @param A0 FPU context (fxsave).
2089; @param A1 Where to return the output FSW.
2090; @param A2 Where to store the 16-bit signed integer value.
2091; @param A3 Pointer to the 80-bit value.
2092;
2093BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
2094 PROLOGUE_4_ARGS
2095 sub xSP, 20h
2096
2097 fninit
2098 fld tword [A3]
2099 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2100 fisttp dword [A2]
2101
2102 fnstsw word [A1]
2103
2104 fninit
2105 add xSP, 20h
2106 EPILOGUE_4_ARGS
2107ENDPROC iemAImpl_fistt_r80_to_i16
2108
2109
2110;;
2111; FPU instruction working on one 80-bit and one 16-bit signed integer value.
2112;
2113; @param 1 The instruction
2114;
2115; @param A0 FPU context (fxsave).
2116; @param A1 Pointer to a IEMFPURESULT for the output.
2117; @param A2 Pointer to the 80-bit value.
2118; @param A3 Pointer to the 16-bit value.
2119;
2120%macro IEMIMPL_FPU_R80_BY_I16 1
2121BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2122 PROLOGUE_4_ARGS
2123 sub xSP, 20h
2124
2125 fninit
2126 fld tword [A2]
2127 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2128 %1 word [A3]
2129
2130 fnstsw word [A1 + IEMFPURESULT.FSW]
2131 fnclex
2132 fstp tword [A1 + IEMFPURESULT.r80Result]
2133
2134 fninit
2135 add xSP, 20h
2136 EPILOGUE_4_ARGS
2137ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2138%endmacro
2139
2140IEMIMPL_FPU_R80_BY_I16 fiadd
2141IEMIMPL_FPU_R80_BY_I16 fimul
2142IEMIMPL_FPU_R80_BY_I16 fisub
2143IEMIMPL_FPU_R80_BY_I16 fisubr
2144IEMIMPL_FPU_R80_BY_I16 fidiv
2145IEMIMPL_FPU_R80_BY_I16 fidivr
2146
2147
2148;;
2149; FPU instruction working on one 80-bit and one 16-bit signed integer value,
2150; only returning FSW.
2151;
2152; @param 1 The instruction
2153;
2154; @param A0 FPU context (fxsave).
2155; @param A1 Where to store the output FSW.
2156; @param A2 Pointer to the 80-bit value.
2157; @param A3 Pointer to the 64-bit value.
2158;
2159%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
2160BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2161 PROLOGUE_4_ARGS
2162 sub xSP, 20h
2163
2164 fninit
2165 fld tword [A2]
2166 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2167 %1 word [A3]
2168
2169 fnstsw word [A1]
2170
2171 fninit
2172 add xSP, 20h
2173 EPILOGUE_4_ARGS
2174ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2175%endmacro
2176
2177IEMIMPL_FPU_R80_BY_I16_FSW ficom
2178
2179
2180
2181;
2182;---------------------- 32-bit signed integer operations ----------------------
2183;
2184
2185
2186;;
2187; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2188;
2189; @param A0 FPU context (fxsave).
2190; @param A1 Pointer to a IEMFPURESULT for the output.
2191; @param A2 Pointer to the 32-bit floating point value to convert.
2192;
2193BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
2194 PROLOGUE_3_ARGS
2195 sub xSP, 20h
2196
2197 fninit
2198 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2199 fild dword [A2]
2200
2201 fnstsw word [A1 + IEMFPURESULT.FSW]
2202 fnclex
2203 fstp tword [A1 + IEMFPURESULT.r80Result]
2204
2205 fninit
2206 add xSP, 20h
2207 EPILOGUE_3_ARGS
2208ENDPROC iemAImpl_fild_i32_to_r80
2209
2210
2211;;
2212; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2213;
2214; @param A0 FPU context (fxsave).
2215; @param A1 Where to return the output FSW.
2216; @param A2 Where to store the 32-bit signed integer value.
2217; @param A3 Pointer to the 80-bit value.
2218;
2219BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2220 PROLOGUE_4_ARGS
2221 sub xSP, 20h
2222
2223 fninit
2224 fld tword [A3]
2225 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2226 fistp dword [A2]
2227
2228 fnstsw word [A1]
2229
2230 fninit
2231 add xSP, 20h
2232 EPILOGUE_4_ARGS
2233ENDPROC iemAImpl_fist_r80_to_i32
2234
2235
2236;;
2237; Store a 80-bit floating point value (register) as a 32-bit signed integer
2238; (memory) with truncation.
2239;
2240; @param A0 FPU context (fxsave).
2241; @param A1 Where to return the output FSW.
2242; @param A2 Where to store the 32-bit signed integer value.
2243; @param A3 Pointer to the 80-bit value.
2244;
2245BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2246 PROLOGUE_4_ARGS
2247 sub xSP, 20h
2248
2249 fninit
2250 fld tword [A3]
2251 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2252 fisttp dword [A2]
2253
2254 fnstsw word [A1]
2255
2256 fninit
2257 add xSP, 20h
2258 EPILOGUE_4_ARGS
2259ENDPROC iemAImpl_fistt_r80_to_i32
2260
2261
2262;;
2263; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2264;
2265; @param 1 The instruction
2266;
2267; @param A0 FPU context (fxsave).
2268; @param A1 Pointer to a IEMFPURESULT for the output.
2269; @param A2 Pointer to the 80-bit value.
2270; @param A3 Pointer to the 32-bit value.
2271;
2272%macro IEMIMPL_FPU_R80_BY_I32 1
2273BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2274 PROLOGUE_4_ARGS
2275 sub xSP, 20h
2276
2277 fninit
2278 fld tword [A2]
2279 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2280 %1 dword [A3]
2281
2282 fnstsw word [A1 + IEMFPURESULT.FSW]
2283 fnclex
2284 fstp tword [A1 + IEMFPURESULT.r80Result]
2285
2286 fninit
2287 add xSP, 20h
2288 EPILOGUE_4_ARGS
2289ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2290%endmacro
2291
2292IEMIMPL_FPU_R80_BY_I32 fiadd
2293IEMIMPL_FPU_R80_BY_I32 fimul
2294IEMIMPL_FPU_R80_BY_I32 fisub
2295IEMIMPL_FPU_R80_BY_I32 fisubr
2296IEMIMPL_FPU_R80_BY_I32 fidiv
2297IEMIMPL_FPU_R80_BY_I32 fidivr
2298
2299
2300;;
2301; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2302; only returning FSW.
2303;
2304; @param 1 The instruction
2305;
2306; @param A0 FPU context (fxsave).
2307; @param A1 Where to store the output FSW.
2308; @param A2 Pointer to the 80-bit value.
2309; @param A3 Pointer to the 64-bit value.
2310;
2311%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2312BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2313 PROLOGUE_4_ARGS
2314 sub xSP, 20h
2315
2316 fninit
2317 fld tword [A2]
2318 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2319 %1 dword [A3]
2320
2321 fnstsw word [A1]
2322
2323 fninit
2324 add xSP, 20h
2325 EPILOGUE_4_ARGS
2326ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2327%endmacro
2328
2329IEMIMPL_FPU_R80_BY_I32_FSW ficom
2330
2331
2332
2333;
2334;---------------------- 64-bit signed integer operations ----------------------
2335;
2336
2337
2338;;
2339; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2340;
2341; @param A0 FPU context (fxsave).
2342; @param A1 Pointer to a IEMFPURESULT for the output.
2343; @param A2 Pointer to the 64-bit floating point value to convert.
2344;
2345BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2346 PROLOGUE_3_ARGS
2347 sub xSP, 20h
2348
2349 fninit
2350 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2351 fild qword [A2]
2352
2353 fnstsw word [A1 + IEMFPURESULT.FSW]
2354 fnclex
2355 fstp tword [A1 + IEMFPURESULT.r80Result]
2356
2357 fninit
2358 add xSP, 20h
2359 EPILOGUE_3_ARGS
2360ENDPROC iemAImpl_fild_i64_to_r80
2361
2362
2363;;
2364; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2365;
2366; @param A0 FPU context (fxsave).
2367; @param A1 Where to return the output FSW.
2368; @param A2 Where to store the 64-bit signed integer value.
2369; @param A3 Pointer to the 80-bit value.
2370;
2371BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2372 PROLOGUE_4_ARGS
2373 sub xSP, 20h
2374
2375 fninit
2376 fld tword [A3]
2377 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2378 fistp qword [A2]
2379
2380 fnstsw word [A1]
2381
2382 fninit
2383 add xSP, 20h
2384 EPILOGUE_4_ARGS
2385ENDPROC iemAImpl_fist_r80_to_i64
2386
2387
2388;;
2389; Store a 80-bit floating point value (register) as a 64-bit signed integer
2390; (memory) with truncation.
2391;
2392; @param A0 FPU context (fxsave).
2393; @param A1 Where to return the output FSW.
2394; @param A2 Where to store the 64-bit signed integer value.
2395; @param A3 Pointer to the 80-bit value.
2396;
2397BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2398 PROLOGUE_4_ARGS
2399 sub xSP, 20h
2400
2401 fninit
2402 fld tword [A3]
2403 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2404 fisttp qword [A2]
2405
2406 fnstsw word [A1]
2407
2408 fninit
2409 add xSP, 20h
2410 EPILOGUE_4_ARGS
2411ENDPROC iemAImpl_fistt_r80_to_i64
2412
2413
2414
2415;
2416;---------------------- 32-bit floating point operations ----------------------
2417;
2418
2419;;
2420; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2421;
2422; @param A0 FPU context (fxsave).
2423; @param A1 Pointer to a IEMFPURESULT for the output.
2424; @param A2 Pointer to the 32-bit floating point value to convert.
2425;
2426BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2427 PROLOGUE_3_ARGS
2428 sub xSP, 20h
2429
2430 fninit
2431 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2432 fld dword [A2]
2433
2434 fnstsw word [A1 + IEMFPURESULT.FSW]
2435 fnclex
2436 fstp tword [A1 + IEMFPURESULT.r80Result]
2437
2438 fninit
2439 add xSP, 20h
2440 EPILOGUE_3_ARGS
2441ENDPROC iemAImpl_fld_r32_to_r80
2442
2443
2444;;
2445; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2446;
2447; @param A0 FPU context (fxsave).
2448; @param A1 Where to return the output FSW.
2449; @param A2 Where to store the 32-bit value.
2450; @param A3 Pointer to the 80-bit value.
2451;
2452BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2453 PROLOGUE_4_ARGS
2454 sub xSP, 20h
2455
2456 fninit
2457 fld tword [A3]
2458 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2459 fst dword [A2]
2460
2461 fnstsw word [A1]
2462
2463 fninit
2464 add xSP, 20h
2465 EPILOGUE_4_ARGS
2466ENDPROC iemAImpl_fst_r80_to_r32
2467
2468
2469;;
2470; FPU instruction working on one 80-bit and one 32-bit floating point value.
2471;
2472; @param 1 The instruction
2473;
2474; @param A0 FPU context (fxsave).
2475; @param A1 Pointer to a IEMFPURESULT for the output.
2476; @param A2 Pointer to the 80-bit value.
2477; @param A3 Pointer to the 32-bit value.
2478;
2479%macro IEMIMPL_FPU_R80_BY_R32 1
2480BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2481 PROLOGUE_4_ARGS
2482 sub xSP, 20h
2483
2484 fninit
2485 fld tword [A2]
2486 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2487 %1 dword [A3]
2488
2489 fnstsw word [A1 + IEMFPURESULT.FSW]
2490 fnclex
2491 fstp tword [A1 + IEMFPURESULT.r80Result]
2492
2493 fninit
2494 add xSP, 20h
2495 EPILOGUE_4_ARGS
2496ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2497%endmacro
2498
2499IEMIMPL_FPU_R80_BY_R32 fadd
2500IEMIMPL_FPU_R80_BY_R32 fmul
2501IEMIMPL_FPU_R80_BY_R32 fsub
2502IEMIMPL_FPU_R80_BY_R32 fsubr
2503IEMIMPL_FPU_R80_BY_R32 fdiv
2504IEMIMPL_FPU_R80_BY_R32 fdivr
2505
2506
2507;;
2508; FPU instruction working on one 80-bit and one 32-bit floating point value,
2509; only returning FSW.
2510;
2511; @param 1 The instruction
2512;
2513; @param A0 FPU context (fxsave).
2514; @param A1 Where to store the output FSW.
2515; @param A2 Pointer to the 80-bit value.
2516; @param A3 Pointer to the 64-bit value.
2517;
2518%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2519BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2520 PROLOGUE_4_ARGS
2521 sub xSP, 20h
2522
2523 fninit
2524 fld tword [A2]
2525 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2526 %1 dword [A3]
2527
2528 fnstsw word [A1]
2529
2530 fninit
2531 add xSP, 20h
2532 EPILOGUE_4_ARGS
2533ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2534%endmacro
2535
2536IEMIMPL_FPU_R80_BY_R32_FSW fcom
2537
2538
2539
2540;
2541;---------------------- 64-bit floating point operations ----------------------
2542;
2543
2544;;
2545; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2546;
2547; @param A0 FPU context (fxsave).
2548; @param A1 Pointer to a IEMFPURESULT for the output.
2549; @param A2 Pointer to the 64-bit floating point value to convert.
2550;
2551BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2552 PROLOGUE_3_ARGS
2553 sub xSP, 20h
2554
2555 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2556 fld qword [A2]
2557
2558 fnstsw word [A1 + IEMFPURESULT.FSW]
2559 fnclex
2560 fstp tword [A1 + IEMFPURESULT.r80Result]
2561
2562 fninit
2563 add xSP, 20h
2564 EPILOGUE_3_ARGS
2565ENDPROC iemAImpl_fld_r64_to_r80
2566
2567
2568;;
2569; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2570;
2571; @param A0 FPU context (fxsave).
2572; @param A1 Where to return the output FSW.
2573; @param A2 Where to store the 64-bit value.
2574; @param A3 Pointer to the 80-bit value.
2575;
2576BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2577 PROLOGUE_4_ARGS
2578 sub xSP, 20h
2579
2580 fninit
2581 fld tword [A3]
2582 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2583 fst qword [A2]
2584
2585 fnstsw word [A1]
2586
2587 fninit
2588 add xSP, 20h
2589 EPILOGUE_4_ARGS
2590ENDPROC iemAImpl_fst_r80_to_r64
2591
2592
2593;;
2594; FPU instruction working on one 80-bit and one 64-bit floating point value.
2595;
2596; @param 1 The instruction
2597;
2598; @param A0 FPU context (fxsave).
2599; @param A1 Pointer to a IEMFPURESULT for the output.
2600; @param A2 Pointer to the 80-bit value.
2601; @param A3 Pointer to the 64-bit value.
2602;
2603%macro IEMIMPL_FPU_R80_BY_R64 1
2604BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2605 PROLOGUE_4_ARGS
2606 sub xSP, 20h
2607
2608 fninit
2609 fld tword [A2]
2610 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2611 %1 qword [A3]
2612
2613 fnstsw word [A1 + IEMFPURESULT.FSW]
2614 fnclex
2615 fstp tword [A1 + IEMFPURESULT.r80Result]
2616
2617 fninit
2618 add xSP, 20h
2619 EPILOGUE_4_ARGS
2620ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2621%endmacro
2622
2623IEMIMPL_FPU_R80_BY_R64 fadd
2624IEMIMPL_FPU_R80_BY_R64 fmul
2625IEMIMPL_FPU_R80_BY_R64 fsub
2626IEMIMPL_FPU_R80_BY_R64 fsubr
2627IEMIMPL_FPU_R80_BY_R64 fdiv
2628IEMIMPL_FPU_R80_BY_R64 fdivr
2629
2630;;
2631; FPU instruction working on one 80-bit and one 64-bit floating point value,
2632; only returning FSW.
2633;
2634; @param 1 The instruction
2635;
2636; @param A0 FPU context (fxsave).
2637; @param A1 Where to store the output FSW.
2638; @param A2 Pointer to the 80-bit value.
2639; @param A3 Pointer to the 64-bit value.
2640;
2641%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2642BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2643 PROLOGUE_4_ARGS
2644 sub xSP, 20h
2645
2646 fninit
2647 fld tword [A2]
2648 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2649 %1 qword [A3]
2650
2651 fnstsw word [A1]
2652
2653 fninit
2654 add xSP, 20h
2655 EPILOGUE_4_ARGS
2656ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2657%endmacro
2658
2659IEMIMPL_FPU_R80_BY_R64_FSW fcom
2660
2661
2662
2663;
2664;---------------------- 80-bit floating point operations ----------------------
2665;
2666
2667;;
2668; Loads a 80-bit floating point register value from memory.
2669;
2670; @param A0 FPU context (fxsave).
2671; @param A1 Pointer to a IEMFPURESULT for the output.
2672; @param A2 Pointer to the 80-bit floating point value to load.
2673;
2674BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2675 PROLOGUE_3_ARGS
2676 sub xSP, 20h
2677
2678 fninit
2679 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2680 fld tword [A2]
2681
2682 fnstsw word [A1 + IEMFPURESULT.FSW]
2683 fnclex
2684 fstp tword [A1 + IEMFPURESULT.r80Result]
2685
2686 fninit
2687 add xSP, 20h
2688 EPILOGUE_3_ARGS
2689ENDPROC iemAImpl_fld_r80_from_r80
2690
2691
2692;;
2693; Store a 80-bit floating point register to memory
2694;
2695; @param A0 FPU context (fxsave).
2696; @param A1 Where to return the output FSW.
2697; @param A2 Where to store the 80-bit value.
2698; @param A3 Pointer to the 80-bit register value.
2699;
2700BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2701 PROLOGUE_4_ARGS
2702 sub xSP, 20h
2703
2704 fninit
2705 fld tword [A3]
2706 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2707 fstp tword [A2]
2708
2709 fnstsw word [A1]
2710
2711 fninit
2712 add xSP, 20h
2713 EPILOGUE_4_ARGS
2714ENDPROC iemAImpl_fst_r80_to_r80
2715
2716
2717;;
2718; Loads an 80-bit floating point register value in BCD format from memory.
2719;
2720; @param A0 FPU context (fxsave).
2721; @param A1 Pointer to a IEMFPURESULT for the output.
2722; @param A2 Pointer to the 80-bit BCD value to load.
2723;
2724BEGINPROC_FASTCALL iemAImpl_fld_r80_from_d80, 12
2725 PROLOGUE_3_ARGS
2726 sub xSP, 20h
2727
2728 fninit
2729 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2730 fbld tword [A2]
2731
2732 fnstsw word [A1 + IEMFPURESULT.FSW]
2733 fnclex
2734 fstp tword [A1 + IEMFPURESULT.r80Result]
2735
2736 fninit
2737 add xSP, 20h
2738 EPILOGUE_3_ARGS
2739ENDPROC iemAImpl_fld_r80_from_d80
2740
2741
2742;;
2743; Store a 80-bit floating point register to memory as BCD
2744;
2745; @param A0 FPU context (fxsave).
2746; @param A1 Where to return the output FSW.
2747; @param A2 Where to store the 80-bit BCD value.
2748; @param A3 Pointer to the 80-bit register value.
2749;
2750BEGINPROC_FASTCALL iemAImpl_fst_r80_to_d80, 16
2751 PROLOGUE_4_ARGS
2752 sub xSP, 20h
2753
2754 fninit
2755 fld tword [A3]
2756 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2757 fbstp tword [A2]
2758
2759 fnstsw word [A1]
2760
2761 fninit
2762 add xSP, 20h
2763 EPILOGUE_4_ARGS
2764ENDPROC iemAImpl_fst_r80_to_d80
2765
2766
2767;;
2768; FPU instruction working on two 80-bit floating point values.
2769;
2770; @param 1 The instruction
2771;
2772; @param A0 FPU context (fxsave).
2773; @param A1 Pointer to a IEMFPURESULT for the output.
2774; @param A2 Pointer to the first 80-bit value (ST0)
2775; @param A3 Pointer to the second 80-bit value (STn).
2776;
2777%macro IEMIMPL_FPU_R80_BY_R80 2
2778BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2779 PROLOGUE_4_ARGS
2780 sub xSP, 20h
2781
2782 fninit
2783 fld tword [A3]
2784 fld tword [A2]
2785 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2786 %1 %2
2787
2788 fnstsw word [A1 + IEMFPURESULT.FSW]
2789 fnclex
2790 fstp tword [A1 + IEMFPURESULT.r80Result]
2791
2792 fninit
2793 add xSP, 20h
2794 EPILOGUE_4_ARGS
2795ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2796%endmacro
2797
2798IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2799IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2800IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2801IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2802IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2803IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2804IEMIMPL_FPU_R80_BY_R80 fprem, {}
2805IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2806IEMIMPL_FPU_R80_BY_R80 fscale, {}
2807
2808
2809;;
2810; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2811; storing the result in ST1 and popping the stack.
2812;
2813; @param 1 The instruction
2814;
2815; @param A0 FPU context (fxsave).
2816; @param A1 Pointer to a IEMFPURESULT for the output.
2817; @param A2 Pointer to the first 80-bit value (ST1).
2818; @param A3 Pointer to the second 80-bit value (ST0).
2819;
2820%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2821BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2822 PROLOGUE_4_ARGS
2823 sub xSP, 20h
2824
2825 fninit
2826 fld tword [A2]
2827 fld tword [A3]
2828 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2829 %1
2830
2831 fnstsw word [A1 + IEMFPURESULT.FSW]
2832 fnclex
2833 fstp tword [A1 + IEMFPURESULT.r80Result]
2834
2835 fninit
2836 add xSP, 20h
2837 EPILOGUE_4_ARGS
2838ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2839%endmacro
2840
2841IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2842IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2843IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2844
2845
2846;;
2847; FPU instruction working on two 80-bit floating point values, only
2848; returning FSW.
2849;
2850; @param 1 The instruction
2851;
2852; @param A0 FPU context (fxsave).
2853; @param A1 Pointer to a uint16_t for the resulting FSW.
2854; @param A2 Pointer to the first 80-bit value.
2855; @param A3 Pointer to the second 80-bit value.
2856;
2857%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2858BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2859 PROLOGUE_4_ARGS
2860 sub xSP, 20h
2861
2862 fninit
2863 fld tword [A3]
2864 fld tword [A2]
2865 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2866 %1 st0, st1
2867
2868 fnstsw word [A1]
2869
2870 fninit
2871 add xSP, 20h
2872 EPILOGUE_4_ARGS
2873ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2874%endmacro
2875
2876IEMIMPL_FPU_R80_BY_R80_FSW fcom
2877IEMIMPL_FPU_R80_BY_R80_FSW fucom
2878
2879
2880;;
2881; FPU instruction working on two 80-bit floating point values,
2882; returning FSW and EFLAGS (eax).
2883;
2884; @param 1 The instruction
2885;
2886; @returns EFLAGS in EAX.
2887; @param A0 FPU context (fxsave).
2888; @param A1 Pointer to a uint16_t for the resulting FSW.
2889; @param A2 Pointer to the first 80-bit value.
2890; @param A3 Pointer to the second 80-bit value.
2891;
2892%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2893BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2894 PROLOGUE_4_ARGS
2895 sub xSP, 20h
2896
2897 fninit
2898 fld tword [A3]
2899 fld tword [A2]
2900 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2901 %1 st1
2902
2903 fnstsw word [A1]
2904 pushf
2905 pop xAX
2906
2907 fninit
2908 add xSP, 20h
2909 EPILOGUE_4_ARGS
2910ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2911%endmacro
2912
2913IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2914IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2915
2916
2917;;
2918; FPU instruction working on one 80-bit floating point value.
2919;
2920; @param 1 The instruction
2921;
2922; @param A0 FPU context (fxsave).
2923; @param A1 Pointer to a IEMFPURESULT for the output.
2924; @param A2 Pointer to the 80-bit value.
2925;
2926%macro IEMIMPL_FPU_R80 1
2927BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2928 PROLOGUE_3_ARGS
2929 sub xSP, 20h
2930
2931 fninit
2932 fld tword [A2]
2933 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2934 %1
2935
2936 fnstsw word [A1 + IEMFPURESULT.FSW]
2937 fnclex
2938 fstp tword [A1 + IEMFPURESULT.r80Result]
2939
2940 fninit
2941 add xSP, 20h
2942 EPILOGUE_3_ARGS
2943ENDPROC iemAImpl_ %+ %1 %+ _r80
2944%endmacro
2945
2946IEMIMPL_FPU_R80 fchs
2947IEMIMPL_FPU_R80 fabs
2948IEMIMPL_FPU_R80 f2xm1
2949IEMIMPL_FPU_R80 fsqrt
2950IEMIMPL_FPU_R80 frndint
2951IEMIMPL_FPU_R80 fsin
2952IEMIMPL_FPU_R80 fcos
2953
2954
2955;;
2956; FPU instruction working on one 80-bit floating point value, only
2957; returning FSW.
2958;
2959; @param 1 The instruction
2960;
2961; @param A0 FPU context (fxsave).
2962; @param A1 Pointer to a uint16_t for the resulting FSW.
2963; @param A2 Pointer to the 80-bit value.
2964;
2965%macro IEMIMPL_FPU_R80_FSW 1
2966BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2967 PROLOGUE_3_ARGS
2968 sub xSP, 20h
2969
2970 fninit
2971 fld tword [A2]
2972 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2973 %1
2974
2975 fnstsw word [A1]
2976
2977 fninit
2978 add xSP, 20h
2979 EPILOGUE_3_ARGS
2980ENDPROC iemAImpl_ %+ %1 %+ _r80
2981%endmacro
2982
2983IEMIMPL_FPU_R80_FSW ftst
2984IEMIMPL_FPU_R80_FSW fxam
2985
2986
2987
2988;;
2989; FPU instruction loading a 80-bit floating point constant.
2990;
2991; @param 1 The instruction
2992;
2993; @param A0 FPU context (fxsave).
2994; @param A1 Pointer to a IEMFPURESULT for the output.
2995;
2996%macro IEMIMPL_FPU_R80_CONST 1
2997BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2998 PROLOGUE_2_ARGS
2999 sub xSP, 20h
3000
3001 fninit
3002 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3003 %1
3004
3005 fnstsw word [A1 + IEMFPURESULT.FSW]
3006 fnclex
3007 fstp tword [A1 + IEMFPURESULT.r80Result]
3008
3009 fninit
3010 add xSP, 20h
3011 EPILOGUE_2_ARGS
3012ENDPROC iemAImpl_ %+ %1 %+
3013%endmacro
3014
3015IEMIMPL_FPU_R80_CONST fld1
3016IEMIMPL_FPU_R80_CONST fldl2t
3017IEMIMPL_FPU_R80_CONST fldl2e
3018IEMIMPL_FPU_R80_CONST fldpi
3019IEMIMPL_FPU_R80_CONST fldlg2
3020IEMIMPL_FPU_R80_CONST fldln2
3021IEMIMPL_FPU_R80_CONST fldz
3022
3023
3024;;
3025; FPU instruction working on one 80-bit floating point value, outputing two.
3026;
3027; @param 1 The instruction
3028;
3029; @param A0 FPU context (fxsave).
3030; @param A1 Pointer to a IEMFPURESULTTWO for the output.
3031; @param A2 Pointer to the 80-bit value.
3032;
3033%macro IEMIMPL_FPU_R80_R80 1
3034BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
3035 PROLOGUE_3_ARGS
3036 sub xSP, 20h
3037
3038 fninit
3039 fld tword [A2]
3040 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3041 %1
3042
3043 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
3044 fnclex
3045 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
3046 fnclex
3047 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
3048
3049 fninit
3050 add xSP, 20h
3051 EPILOGUE_3_ARGS
3052ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
3053%endmacro
3054
3055IEMIMPL_FPU_R80_R80 fptan
3056IEMIMPL_FPU_R80_R80 fxtract
3057IEMIMPL_FPU_R80_R80 fsincos
3058
3059
3060
3061
3062;---------------------- SSE and MMX Operations ----------------------
3063
3064;; @todo what do we need to do for MMX?
3065%macro IEMIMPL_MMX_PROLOGUE 0
3066%endmacro
3067%macro IEMIMPL_MMX_EPILOGUE 0
3068%endmacro
3069
3070;; @todo what do we need to do for SSE?
3071%macro IEMIMPL_SSE_PROLOGUE 0
3072%endmacro
3073%macro IEMIMPL_SSE_EPILOGUE 0
3074%endmacro
3075
3076
3077;;
3078; Media instruction working on two full sized registers.
3079;
3080; @param 1 The instruction
3081;
3082; @param A0 FPU context (fxsave).
3083; @param A1 Pointer to the first media register size operand (input/output).
3084; @param A2 Pointer to the second media register size operand (input).
3085;
3086%macro IEMIMPL_MEDIA_F2 1
3087BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3088 PROLOGUE_3_ARGS
3089 IEMIMPL_MMX_PROLOGUE
3090
3091 movq mm0, [A1]
3092 movq mm1, [A2]
3093 %1 mm0, mm1
3094 movq [A1], mm0
3095
3096 IEMIMPL_MMX_EPILOGUE
3097 EPILOGUE_3_ARGS
3098ENDPROC iemAImpl_ %+ %1 %+ _u64
3099
3100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3101 PROLOGUE_3_ARGS
3102 IEMIMPL_SSE_PROLOGUE
3103
3104 movdqu xmm0, [A1]
3105 movdqu xmm1, [A2]
3106 %1 xmm0, xmm1
3107 movdqu [A1], xmm0
3108
3109 IEMIMPL_SSE_EPILOGUE
3110 EPILOGUE_3_ARGS
3111ENDPROC iemAImpl_ %+ %1 %+ _u128
3112%endmacro
3113
3114IEMIMPL_MEDIA_F2 pxor
3115IEMIMPL_MEDIA_F2 pcmpeqb
3116IEMIMPL_MEDIA_F2 pcmpeqw
3117IEMIMPL_MEDIA_F2 pcmpeqd
3118
3119
3120;;
3121; Media instruction working on one full sized and one half sized register (lower half).
3122;
3123; @param 1 The instruction
3124; @param 2 1 if MMX is included, 0 if not.
3125;
3126; @param A0 FPU context (fxsave).
3127; @param A1 Pointer to the first full sized media register operand (input/output).
3128; @param A2 Pointer to the second half sized media register operand (input).
3129;
3130%macro IEMIMPL_MEDIA_F1L1 2
3131 %if %2 != 0
3132BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3133 PROLOGUE_3_ARGS
3134 IEMIMPL_MMX_PROLOGUE
3135
3136 movq mm0, [A1]
3137 movd mm1, [A2]
3138 %1 mm0, mm1
3139 movq [A1], mm0
3140
3141 IEMIMPL_MMX_EPILOGUE
3142 EPILOGUE_3_ARGS
3143ENDPROC iemAImpl_ %+ %1 %+ _u64
3144 %endif
3145
3146BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3147 PROLOGUE_3_ARGS
3148 IEMIMPL_SSE_PROLOGUE
3149
3150 movdqu xmm0, [A1]
3151 movq xmm1, [A2]
3152 %1 xmm0, xmm1
3153 movdqu [A1], xmm0
3154
3155 IEMIMPL_SSE_EPILOGUE
3156 EPILOGUE_3_ARGS
3157ENDPROC iemAImpl_ %+ %1 %+ _u128
3158%endmacro
3159
3160IEMIMPL_MEDIA_F1L1 punpcklbw, 1
3161IEMIMPL_MEDIA_F1L1 punpcklwd, 1
3162IEMIMPL_MEDIA_F1L1 punpckldq, 1
3163IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
3164
3165
3166;;
3167; Media instruction working on one full sized and one half sized register (high half).
3168;
3169; @param 1 The instruction
3170; @param 2 1 if MMX is included, 0 if not.
3171;
3172; @param A0 FPU context (fxsave).
3173; @param A1 Pointer to the first full sized media register operand (input/output).
3174; @param A2 Pointer to the second full sized media register operand, where we
3175; will only use the upper half (input).
3176;
3177%macro IEMIMPL_MEDIA_F1H1 2
3178 %if %2 != 0
3179BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3180 PROLOGUE_3_ARGS
3181 IEMIMPL_MMX_PROLOGUE
3182
3183 movq mm0, [A1]
3184 movq mm1, [A2]
3185 %1 mm0, mm1
3186 movq [A1], mm0
3187
3188 IEMIMPL_MMX_EPILOGUE
3189 EPILOGUE_3_ARGS
3190ENDPROC iemAImpl_ %+ %1 %+ _u64
3191 %endif
3192
3193BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3194 PROLOGUE_3_ARGS
3195 IEMIMPL_SSE_PROLOGUE
3196
3197 movdqu xmm0, [A1]
3198 movdqu xmm1, [A2]
3199 %1 xmm0, xmm1
3200 movdqu [A1], xmm0
3201
3202 IEMIMPL_SSE_EPILOGUE
3203 EPILOGUE_3_ARGS
3204ENDPROC iemAImpl_ %+ %1 %+ _u128
3205%endmacro
3206
3207IEMIMPL_MEDIA_F1L1 punpckhbw, 1
3208IEMIMPL_MEDIA_F1L1 punpckhwd, 1
3209IEMIMPL_MEDIA_F1L1 punpckhdq, 1
3210IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
3211
3212
3213;
3214; Shufflers with evil 8-bit immediates.
3215;
3216
3217BEGINPROC_FASTCALL iemAImpl_pshufw, 16
3218 PROLOGUE_4_ARGS
3219 IEMIMPL_MMX_PROLOGUE
3220
3221 movq mm0, [A1]
3222 movq mm1, [A2]
3223 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
3224 lea T1, [.imm0 xWrtRIP]
3225 lea T1, [T1 + T0]
3226 call T1
3227 movq [A1], mm0
3228
3229 IEMIMPL_MMX_EPILOGUE
3230 EPILOGUE_4_ARGS
3231%assign bImm 0
3232%rep 256
3233.imm %+ bImm:
3234 pshufw mm0, mm1, bImm
3235 ret
3236 %assign bImm bImm + 1
3237%endrep
3238.immEnd: ; 256*5 == 0x500
3239dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3240dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3241ENDPROC iemAImpl_pshufw
3242
3243
3244%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3245BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
3246 PROLOGUE_4_ARGS
3247 IEMIMPL_SSE_PROLOGUE
3248
3249 movdqu xmm0, [A1]
3250 movdqu xmm1, [A2]
3251 lea T1, [.imm0 xWrtRIP]
3252 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
3253 lea T1, [T1 + T0*2]
3254 call T1
3255 movdqu [A1], xmm0
3256
3257 IEMIMPL_SSE_EPILOGUE
3258 EPILOGUE_4_ARGS
3259 %assign bImm 0
3260 %rep 256
3261.imm %+ bImm:
3262 %1 xmm0, xmm1, bImm
3263 ret
3264 %assign bImm bImm + 1
3265 %endrep
3266.immEnd: ; 256*6 == 0x600
3267dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3268dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3269ENDPROC iemAImpl_ %+ %1
3270%endmacro
3271
3272IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3273IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3274IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3275
3276
3277;
3278; Move byte mask.
3279;
3280
3281BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
3282 PROLOGUE_3_ARGS
3283 IEMIMPL_MMX_PROLOGUE
3284
3285 mov T0, [A1]
3286 movq mm1, [A2]
3287 pmovmskb T0, mm1
3288 mov [A1], T0
3289%ifdef RT_ARCH_X86
3290 mov dword [A1 + 4], 0
3291%endif
3292 IEMIMPL_MMX_EPILOGUE
3293 EPILOGUE_3_ARGS
3294ENDPROC iemAImpl_pmovmskb_u64
3295
3296BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3297 PROLOGUE_3_ARGS
3298 IEMIMPL_SSE_PROLOGUE
3299
3300 mov T0, [A1]
3301 movdqu xmm1, [A2]
3302 pmovmskb T0, xmm1
3303 mov [A1], T0
3304%ifdef RT_ARCH_X86
3305 mov dword [A1 + 4], 0
3306%endif
3307 IEMIMPL_SSE_EPILOGUE
3308 EPILOGUE_3_ARGS
3309ENDPROC iemAImpl_pmovmskb_u128
3310
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette