VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 94410

Last change on this file since 94410 was 94410, checked in by vboxsync, 3 years ago

VMM/IEM: Correction to iemAImpl_fistt_r80_to_i16 return variable size. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 95.6 KB
Line 
1; $Id: IEMAllAImpl.asm 94410 2022-03-31 10:59:45Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;*********************************************************************************************************************************
20;* Header Files *
21;*********************************************************************************************************************************
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;*********************************************************************************************************************************
28;* Defined Constants And Macros *
29;*********************************************************************************************************************************
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175 %define T2 r10 ; only AMD64
176 %define T2_32 r10d
177 %define T2_16 r10w
178 %define T2_8 r10b
179
180%else
181 ; x86
182 %macro PROLOGUE_1_ARGS 0
183 push edi
184 %endmacro
185 %macro EPILOGUE_1_ARGS 0
186 pop edi
187 ret 0
188 %endmacro
189 %macro EPILOGUE_1_ARGS_EX 1
190 pop edi
191 ret %1
192 %endmacro
193
194 %macro PROLOGUE_2_ARGS 0
195 push edi
196 %endmacro
197 %macro EPILOGUE_2_ARGS 0
198 pop edi
199 ret 0
200 %endmacro
201 %macro EPILOGUE_2_ARGS_EX 1
202 pop edi
203 ret %1
204 %endmacro
205
206 %macro PROLOGUE_3_ARGS 0
207 push ebx
208 mov ebx, [esp + 4 + 4]
209 push edi
210 %endmacro
211 %macro EPILOGUE_3_ARGS_EX 1
212 %if (%1) < 4
213 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
214 %endif
215 pop edi
216 pop ebx
217 ret %1
218 %endmacro
219 %macro EPILOGUE_3_ARGS 0
220 EPILOGUE_3_ARGS_EX 4
221 %endmacro
222
223 %macro PROLOGUE_4_ARGS 0
224 push ebx
225 push edi
226 push esi
227 mov ebx, [esp + 12 + 4 + 0]
228 mov esi, [esp + 12 + 4 + 4]
229 %endmacro
230 %macro EPILOGUE_4_ARGS_EX 1
231 %if (%1) < 8
232 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
233 %endif
234 pop esi
235 pop edi
236 pop ebx
237 ret %1
238 %endmacro
239 %macro EPILOGUE_4_ARGS 0
240 EPILOGUE_4_ARGS_EX 8
241 %endmacro
242
243 %define A0 ecx
244 %define A0_32 ecx
245 %define A0_16 cx
246 %define A0_8 cl
247
248 %define A1 edx
249 %define A1_32 edx
250 %define A1_16 dx
251 %define A1_8 dl
252
253 %define A2 ebx
254 %define A2_32 ebx
255 %define A2_16 bx
256 %define A2_8 bl
257
258 %define A3 esi
259 %define A3_32 esi
260 %define A3_16 si
261
262 %define T0 eax
263 %define T0_32 eax
264 %define T0_16 ax
265 %define T0_8 al
266
267 %define T1 edi
268 %define T1_32 edi
269 %define T1_16 di
270%endif
271
272
273;;
274; Load the relevant flags from [%1] if there are undefined flags (%3).
275;
276; @remarks Clobbers T0, stack. Changes EFLAGS.
277; @param A2 The register pointing to the flags.
278; @param 1 The parameter (A0..A3) pointing to the eflags.
279; @param 2 The set of modified flags.
280; @param 3 The set of undefined flags.
281;
282%macro IEM_MAYBE_LOAD_FLAGS 3
283 ;%if (%3) != 0
284 pushf ; store current flags
285 mov T0_32, [%1] ; load the guest flags
286 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
287 and T0_32, (%2 | %3) ; select the modified and undefined flags.
288 or [xSP], T0 ; merge guest flags with host flags.
289 popf ; load the mixed flags.
290 ;%endif
291%endmacro
292
293;;
294; Update the flag.
295;
296; @remarks Clobbers T0, T1, stack.
297; @param 1 The register pointing to the EFLAGS.
298; @param 2 The mask of modified flags to save.
299; @param 3 The mask of undefined flags to (maybe) save.
300;
301%macro IEM_SAVE_FLAGS 3
302 %if (%2 | %3) != 0
303 pushf
304 pop T1
305 mov T0_32, [%1] ; flags
306 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
307 and T1_32, (%2 | %3) ; select the modified and undefined flags.
308 or T0_32, T1_32 ; combine the flags.
309 mov [%1], T0_32 ; save the flags.
310 %endif
311%endmacro
312
313;;
314; Calculates the new EFLAGS based on the CPU EFLAGS and fixed clear and set bit masks.
315;
316; @remarks Clobbers T0, T1, stack.
317; @param 1 The register pointing to the EFLAGS.
318; @param 2 The mask of modified flags to save.
319; @param 3 Mask of additional flags to always clear
320; @param 4 Mask of additional flags to always set.
321;
322%macro IEM_SAVE_AND_ADJUST_FLAGS 4
323 %if (%2 | %3 | %4) != 0
324 pushf
325 pop T1
326 mov T0_32, [%1] ; load flags.
327 and T0_32, ~(%2 | %3) ; clear the modified and always cleared flags.
328 and T1_32, (%2) ; select the modified flags.
329 or T0_32, T1_32 ; combine the flags.
330 %if (%4) != 0
331 or T0_32, %4 ; add the always set flags.
332 %endif
333 mov [%1], T0_32 ; save the result.
334 %endif
335%endmacro
336
337;;
338; Calculates the new EFLAGS based on the CPU EFLAGS (%2), a clear mask (%3),
339; signed input (%4[%5]) and parity index (%6).
340;
341; This is used by MUL and IMUL, where we got result (%4 & %6) in xAX which is
342; also T0. So, we have to use T1 for the EFLAGS calculation and save T0/xAX
343; while we extract the %2 flags from the CPU EFLAGS or use T2 (only AMD64).
344;
345; @remarks Clobbers T0, T1, stack, %6, EFLAGS.
346; @param 1 The register pointing to the EFLAGS.
347; @param 2 The mask of modified flags to save.
348; @param 3 Mask of additional flags to always clear
349; @param 4 The result register to set SF by.
350; @param 5 The width of the %4 register in bits (8, 16, 32, or 64).
351; @param 6 The (full) register containing the parity table index. Will be modified!
352
353%macro IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF 6
354 %ifdef RT_ARCH_AMD64
355 pushf
356 pop T2
357 %else
358 push T0
359 pushf
360 pop T0
361 %endif
362 mov T1_32, [%1] ; load flags.
363 and T1_32, ~(%2 | %3 | X86_EFL_PF | X86_EFL_SF) ; clear the modified, always cleared flags and the two flags we calc.
364 %ifdef RT_ARCH_AMD64
365 and T2_32, (%2) ; select the modified flags.
366 or T1_32, T2_32 ; combine the flags.
367 %else
368 and T0_32, (%2) ; select the modified flags.
369 or T1_32, T0_32 ; combine the flags.
370 pop T0
371 %endif
372
373 ; First calculate SF as it's likely to be refereing to the same register as %6 does.
374 bt %4, %5 - 1
375 jnc %%sf_clear
376 or T1_32, X86_EFL_SF
377 %%sf_clear:
378
379 ; Parity last.
380 and %6, 0xff
381 %ifdef RT_ARCH_AMD64
382 lea T2, [NAME(g_afParity) xWrtRIP]
383 or T1_8, [T2 + %6]
384 %else
385 or T1_8, [NAME(g_afParity) + %6]
386 %endif
387
388 mov [%1], T1_32 ; save the result.
389%endmacro
390
391;;
392; Calculates the new EFLAGS using fixed clear and set bit masks.
393;
394; @remarks Clobbers T0.
395; @param 1 The register pointing to the EFLAGS.
396; @param 2 Mask of additional flags to always clear
397; @param 3 Mask of additional flags to always set.
398;
399%macro IEM_ADJUST_FLAGS 3
400 %if (%2 | %3) != 0
401 mov T0_32, [%1] ; Load flags.
402 %if (%2) != 0
403 and T0_32, ~(%2) ; Remove the always cleared flags.
404 %endif
405 %if (%3) != 0
406 or T0_32, %3 ; Add the always set flags.
407 %endif
408 mov [%1], T0_32 ; Save the result.
409 %endif
410%endmacro
411
412;;
413; Calculates the new EFLAGS using fixed clear and set bit masks.
414;
415; @remarks Clobbers T0, %4, EFLAGS.
416; @param 1 The register pointing to the EFLAGS.
417; @param 2 Mask of additional flags to always clear
418; @param 3 Mask of additional flags to always set.
419; @param 4 The (full) register containing the parity table index. Will be modified!
420;
421%macro IEM_ADJUST_FLAGS_WITH_PARITY 4
422 mov T0_32, [%1] ; Load flags.
423 and T0_32, ~(%2 | X86_EFL_PF) ; Remove PF and the always cleared flags.
424 %if (%3) != 0
425 or T0_32, %3 ; Add the always set flags.
426 %endif
427 and %4, 0xff
428 %ifdef RT_ARCH_AMD64
429 lea T2, [NAME(g_afParity) xWrtRIP]
430 or T0_8, [T2 + %4]
431 %else
432 or T0_8, [NAME(g_afParity) + %4]
433 %endif
434 mov [%1], T0_32 ; Save the result.
435%endmacro
436
437
438;*********************************************************************************************************************************
439;* External Symbols *
440;*********************************************************************************************************************************
441extern NAME(g_afParity)
442
443
444;;
445; Macro for implementing a binary operator.
446;
447; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
448; variants, except on 32-bit system where the 64-bit accesses requires hand
449; coding.
450;
451; All the functions takes a pointer to the destination memory operand in A0,
452; the source register operand in A1 and a pointer to eflags in A2.
453;
454; @param 1 The instruction mnemonic.
455; @param 2 Non-zero if there should be a locked version.
456; @param 3 The modified flags.
457; @param 4 The undefined flags.
458;
459%macro IEMIMPL_BIN_OP 4
460BEGINCODE
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
462 PROLOGUE_3_ARGS
463 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
464 %1 byte [A0], A1_8
465 IEM_SAVE_FLAGS A2, %3, %4
466 EPILOGUE_3_ARGS
467ENDPROC iemAImpl_ %+ %1 %+ _u8
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64
493 %endif ; RT_ARCH_AMD64
494
495 %if %2 != 0 ; locked versions requested?
496
497BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
498 PROLOGUE_3_ARGS
499 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
500 lock %1 byte [A0], A1_8
501 IEM_SAVE_FLAGS A2, %3, %4
502 EPILOGUE_3_ARGS
503ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
504
505BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
506 PROLOGUE_3_ARGS
507 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
508 lock %1 word [A0], A1_16
509 IEM_SAVE_FLAGS A2, %3, %4
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
516 lock %1 dword [A0], A1_32
517 IEM_SAVE_FLAGS A2, %3, %4
518 EPILOGUE_3_ARGS
519ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
520
521 %ifdef RT_ARCH_AMD64
522BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
523 PROLOGUE_3_ARGS
524 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
525 lock %1 qword [A0], A1
526 IEM_SAVE_FLAGS A2, %3, %4
527 EPILOGUE_3_ARGS_EX 8
528ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
529 %endif ; RT_ARCH_AMD64
530 %endif ; locked
531%endmacro
532
533; instr,lock,modified-flags.
534IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
535IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
536IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
537IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
538IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
539IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
540IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
541IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
542IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
543
544
545;;
546; Macro for implementing a bit operator.
547;
548; This will generate code for the 16, 32 and 64 bit accesses with locked
549; variants, except on 32-bit system where the 64-bit accesses requires hand
550; coding.
551;
552; All the functions takes a pointer to the destination memory operand in A0,
553; the source register operand in A1 and a pointer to eflags in A2.
554;
555; @param 1 The instruction mnemonic.
556; @param 2 Non-zero if there should be a locked version.
557; @param 3 The modified flags.
558; @param 4 The undefined flags.
559;
560%macro IEMIMPL_BIT_OP 4
561BEGINCODE
562BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
563 PROLOGUE_3_ARGS
564 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
565 %1 word [A0], A1_16
566 IEM_SAVE_FLAGS A2, %3, %4
567 EPILOGUE_3_ARGS
568ENDPROC iemAImpl_ %+ %1 %+ _u16
569
570BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
573 %1 dword [A0], A1_32
574 IEM_SAVE_FLAGS A2, %3, %4
575 EPILOGUE_3_ARGS
576ENDPROC iemAImpl_ %+ %1 %+ _u32
577
578 %ifdef RT_ARCH_AMD64
579BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
582 %1 qword [A0], A1
583 IEM_SAVE_FLAGS A2, %3, %4
584 EPILOGUE_3_ARGS_EX 8
585ENDPROC iemAImpl_ %+ %1 %+ _u64
586 %endif ; RT_ARCH_AMD64
587
588 %if %2 != 0 ; locked versions requested?
589
590BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
591 PROLOGUE_3_ARGS
592 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
593 lock %1 word [A0], A1_16
594 IEM_SAVE_FLAGS A2, %3, %4
595 EPILOGUE_3_ARGS
596ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
597
598BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
599 PROLOGUE_3_ARGS
600 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
601 lock %1 dword [A0], A1_32
602 IEM_SAVE_FLAGS A2, %3, %4
603 EPILOGUE_3_ARGS
604ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
605
606 %ifdef RT_ARCH_AMD64
607BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
608 PROLOGUE_3_ARGS
609 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
610 lock %1 qword [A0], A1
611 IEM_SAVE_FLAGS A2, %3, %4
612 EPILOGUE_3_ARGS_EX 8
613ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
614 %endif ; RT_ARCH_AMD64
615 %endif ; locked
616%endmacro
617IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
618IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
619IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
620IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
621
622;;
623; Macro for implementing a bit search operator.
624;
625; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
626; system where the 64-bit accesses requires hand coding.
627;
628; All the functions takes a pointer to the destination memory operand in A0,
629; the source register operand in A1 and a pointer to eflags in A2.
630;
631; In the ZF case the destination register is 'undefined', however it seems that
632; both AMD and Intel just leaves it as is. The undefined EFLAGS differs between
633; AMD and Intel and accoridng to https://www.sandpile.org/x86/flags.htm between
634; Intel microarchitectures. We only implement 'intel' and 'amd' variation with
635; the behaviour of more recent CPUs (Intel 10980X and AMD 3990X).
636;
637; @param 1 The instruction mnemonic.
638; @param 2 The modified flags.
639; @param 3 The undefined flags.
640;
641%macro IEMIMPL_BIT_OP 3
642BEGINCODE
643BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
644 PROLOGUE_3_ARGS
645 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
646 %1 T0_16, A1_16
647 jz .unchanged_dst
648 mov [A0], T0_16
649.unchanged_dst:
650 IEM_SAVE_FLAGS A2, %2, %3
651 EPILOGUE_3_ARGS
652ENDPROC iemAImpl_ %+ %1 %+ _u16
653
654BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12
655 PROLOGUE_3_ARGS
656 %1 T1_16, A1_16
657 jz .unchanged_dst
658 mov [A0], T1_16
659 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
660 EPILOGUE_3_ARGS
661.unchanged_dst:
662 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
663 EPILOGUE_3_ARGS
664ENDPROC iemAImpl_ %+ %1 %+ _u16_intel
665
666BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12
667 PROLOGUE_3_ARGS
668 %1 T0_16, A1_16
669 jz .unchanged_dst
670 mov [A0], T0_16
671.unchanged_dst:
672 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
673 EPILOGUE_3_ARGS
674ENDPROC iemAImpl_ %+ %1 %+ _u16_amd
675
676
677BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
678 PROLOGUE_3_ARGS
679 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
680 %1 T0_32, A1_32
681 jz .unchanged_dst
682 mov [A0], T0_32
683.unchanged_dst:
684 IEM_SAVE_FLAGS A2, %2, %3
685 EPILOGUE_3_ARGS
686ENDPROC iemAImpl_ %+ %1 %+ _u32
687
688BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 12
689 PROLOGUE_3_ARGS
690 %1 T1_32, A1_32
691 jz .unchanged_dst
692 mov [A0], T1_32
693 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
694 EPILOGUE_3_ARGS
695.unchanged_dst:
696 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
697 EPILOGUE_3_ARGS
698ENDPROC iemAImpl_ %+ %1 %+ _u32_intel
699
700BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 12
701 PROLOGUE_3_ARGS
702 %1 T0_32, A1_32
703 jz .unchanged_dst
704 mov [A0], T0_32
705.unchanged_dst:
706 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_ %+ %1 %+ _u32_amd
709
710
711 %ifdef RT_ARCH_AMD64
712
713BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
714 PROLOGUE_3_ARGS
715 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
716 %1 T0, A1
717 jz .unchanged_dst
718 mov [A0], T0
719.unchanged_dst:
720 IEM_SAVE_FLAGS A2, %2, %3
721 EPILOGUE_3_ARGS_EX 8
722ENDPROC iemAImpl_ %+ %1 %+ _u64
723
724BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16
725 PROLOGUE_3_ARGS
726 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
727 %1 T1, A1
728 jz .unchanged_dst
729 mov [A0], T1
730 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
731 EPILOGUE_3_ARGS
732.unchanged_dst:
733 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
734 EPILOGUE_3_ARGS
735ENDPROC iemAImpl_ %+ %1 %+ _u64_intel
736
737BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16
738 PROLOGUE_3_ARGS
739 %1 T0, A1
740 jz .unchanged_dst
741 mov [A0], T0
742.unchanged_dst:
743 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
744 EPILOGUE_3_ARGS_EX 8
745ENDPROC iemAImpl_ %+ %1 %+ _u64_amd
746
747 %endif ; RT_ARCH_AMD64
748%endmacro
749
750IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
751IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
752
753
754;
755; IMUL is also a similar but yet different case (no lock, no mem dst).
756; The rDX:rAX variant of imul is handled together with mul further down.
757;
758BEGINCODE
759; @param 1 EFLAGS that are modified.
760; @param 2 Undefined EFLAGS.
761; @param 3 Function suffix.
762; @param 4 EFLAGS variation: 0 for native, 1 for intel (ignored),
763; 2 for AMD (set AF, clear PF, ZF and SF).
764%macro IEMIMPL_IMUL_TWO 4
765BEGINPROC_FASTCALL iemAImpl_imul_two_u16 %+ %3, 12
766 PROLOGUE_3_ARGS
767 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
768 imul A1_16, word [A0]
769 mov [A0], A1_16
770 %if %4 != 1
771 IEM_SAVE_FLAGS A2, %1, %2
772 %else
773 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_16, 16, A1
774 %endif
775 EPILOGUE_3_ARGS
776ENDPROC iemAImpl_imul_two_u16 %+ %3
777
778BEGINPROC_FASTCALL iemAImpl_imul_two_u32 %+ %3, 12
779 PROLOGUE_3_ARGS
780 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
781 imul A1_32, dword [A0]
782 mov [A0], A1_32
783 %if %4 != 1
784 IEM_SAVE_FLAGS A2, %1, %2
785 %else
786 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_32, 32, A1
787 %endif
788 EPILOGUE_3_ARGS
789ENDPROC iemAImpl_imul_two_u32 %+ %3
790
791 %ifdef RT_ARCH_AMD64
792BEGINPROC_FASTCALL iemAImpl_imul_two_u64 %+ %3, 16
793 PROLOGUE_3_ARGS
794 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
795 imul A1, qword [A0]
796 mov [A0], A1
797 %if %4 != 1
798 IEM_SAVE_FLAGS A2, %1, %2
799 %else
800 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1, 64, A1
801 %endif
802 EPILOGUE_3_ARGS_EX 8
803ENDPROC iemAImpl_imul_two_u64 %+ %3
804 %endif ; RT_ARCH_AMD64
805%endmacro
806IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, , 0
807IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, 0, _intel, 1
808IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, 0, _amd, 2
809
810
811;
812; XCHG for memory operands. This implies locking. No flag changes.
813;
814; Each function takes two arguments, first the pointer to the memory,
815; then the pointer to the register. They all return void.
816;
817BEGINCODE
818BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
819 PROLOGUE_2_ARGS
820 mov T0_8, [A1]
821 xchg [A0], T0_8
822 mov [A1], T0_8
823 EPILOGUE_2_ARGS
824ENDPROC iemAImpl_xchg_u8_locked
825
826BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
827 PROLOGUE_2_ARGS
828 mov T0_16, [A1]
829 xchg [A0], T0_16
830 mov [A1], T0_16
831 EPILOGUE_2_ARGS
832ENDPROC iemAImpl_xchg_u16_locked
833
834BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
835 PROLOGUE_2_ARGS
836 mov T0_32, [A1]
837 xchg [A0], T0_32
838 mov [A1], T0_32
839 EPILOGUE_2_ARGS
840ENDPROC iemAImpl_xchg_u32_locked
841
842%ifdef RT_ARCH_AMD64
843BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
844 PROLOGUE_2_ARGS
845 mov T0, [A1]
846 xchg [A0], T0
847 mov [A1], T0
848 EPILOGUE_2_ARGS
849ENDPROC iemAImpl_xchg_u64_locked
850%endif
851
852; Unlocked variants for fDisregardLock mode.
853
854BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
855 PROLOGUE_2_ARGS
856 mov T0_8, [A1]
857 mov T1_8, [A0]
858 mov [A0], T0_8
859 mov [A1], T1_8
860 EPILOGUE_2_ARGS
861ENDPROC iemAImpl_xchg_u8_unlocked
862
863BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
864 PROLOGUE_2_ARGS
865 mov T0_16, [A1]
866 mov T1_16, [A0]
867 mov [A0], T0_16
868 mov [A1], T1_16
869 EPILOGUE_2_ARGS
870ENDPROC iemAImpl_xchg_u16_unlocked
871
872BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
873 PROLOGUE_2_ARGS
874 mov T0_32, [A1]
875 mov T1_32, [A0]
876 mov [A0], T0_32
877 mov [A1], T1_32
878 EPILOGUE_2_ARGS
879ENDPROC iemAImpl_xchg_u32_unlocked
880
881%ifdef RT_ARCH_AMD64
882BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
883 PROLOGUE_2_ARGS
884 mov T0, [A1]
885 mov T1, [A0]
886 mov [A0], T0
887 mov [A1], T1
888 EPILOGUE_2_ARGS
889ENDPROC iemAImpl_xchg_u64_unlocked
890%endif
891
892
893;
894; XADD for memory operands.
895;
896; Each function takes three arguments, first the pointer to the
897; memory/register, then the pointer to the register, and finally a pointer to
898; eflags. They all return void.
899;
900BEGINCODE
901BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
902 PROLOGUE_3_ARGS
903 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
904 mov T0_8, [A1]
905 xadd [A0], T0_8
906 mov [A1], T0_8
907 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
908 EPILOGUE_3_ARGS
909ENDPROC iemAImpl_xadd_u8
910
911BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
912 PROLOGUE_3_ARGS
913 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
914 mov T0_16, [A1]
915 xadd [A0], T0_16
916 mov [A1], T0_16
917 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
918 EPILOGUE_3_ARGS
919ENDPROC iemAImpl_xadd_u16
920
921BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
922 PROLOGUE_3_ARGS
923 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
924 mov T0_32, [A1]
925 xadd [A0], T0_32
926 mov [A1], T0_32
927 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
928 EPILOGUE_3_ARGS
929ENDPROC iemAImpl_xadd_u32
930
931%ifdef RT_ARCH_AMD64
932BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
933 PROLOGUE_3_ARGS
934 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
935 mov T0, [A1]
936 xadd [A0], T0
937 mov [A1], T0
938 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
939 EPILOGUE_3_ARGS
940ENDPROC iemAImpl_xadd_u64
941%endif ; RT_ARCH_AMD64
942
943BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
944 PROLOGUE_3_ARGS
945 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
946 mov T0_8, [A1]
947 lock xadd [A0], T0_8
948 mov [A1], T0_8
949 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
950 EPILOGUE_3_ARGS
951ENDPROC iemAImpl_xadd_u8_locked
952
953BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
954 PROLOGUE_3_ARGS
955 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
956 mov T0_16, [A1]
957 lock xadd [A0], T0_16
958 mov [A1], T0_16
959 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
960 EPILOGUE_3_ARGS
961ENDPROC iemAImpl_xadd_u16_locked
962
963BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
964 PROLOGUE_3_ARGS
965 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
966 mov T0_32, [A1]
967 lock xadd [A0], T0_32
968 mov [A1], T0_32
969 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
970 EPILOGUE_3_ARGS
971ENDPROC iemAImpl_xadd_u32_locked
972
973%ifdef RT_ARCH_AMD64
974BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
975 PROLOGUE_3_ARGS
976 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
977 mov T0, [A1]
978 lock xadd [A0], T0
979 mov [A1], T0
980 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
981 EPILOGUE_3_ARGS
982ENDPROC iemAImpl_xadd_u64_locked
983%endif ; RT_ARCH_AMD64
984
985
986;
987; CMPXCHG8B.
988;
989; These are tricky register wise, so the code is duplicated for each calling
990; convention.
991;
992; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
993;
994; C-proto:
995; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
996; uint32_t *pEFlags));
997;
998; Note! Identical to iemAImpl_cmpxchg16b.
999;
1000BEGINCODE
1001BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
1002%ifdef RT_ARCH_AMD64
1003 %ifdef ASM_CALL64_MSC
1004 push rbx
1005
1006 mov r11, rdx ; pu64EaxEdx (is also T1)
1007 mov r10, rcx ; pu64Dst
1008
1009 mov ebx, [r8]
1010 mov ecx, [r8 + 4]
1011 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1012 mov eax, [r11]
1013 mov edx, [r11 + 4]
1014
1015 lock cmpxchg8b [r10]
1016
1017 mov [r11], eax
1018 mov [r11 + 4], edx
1019 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1020
1021 pop rbx
1022 ret
1023 %else
1024 push rbx
1025
1026 mov r10, rcx ; pEFlags
1027 mov r11, rdx ; pu64EbxEcx (is also T1)
1028
1029 mov ebx, [r11]
1030 mov ecx, [r11 + 4]
1031 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1032 mov eax, [rsi]
1033 mov edx, [rsi + 4]
1034
1035 lock cmpxchg8b [rdi]
1036
1037 mov [rsi], eax
1038 mov [rsi + 4], edx
1039 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1040
1041 pop rbx
1042 ret
1043
1044 %endif
1045%else
1046 push esi
1047 push edi
1048 push ebx
1049 push ebp
1050
1051 mov edi, ecx ; pu64Dst
1052 mov esi, edx ; pu64EaxEdx
1053 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
1054 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1055
1056 mov ebx, [ecx]
1057 mov ecx, [ecx + 4]
1058 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1059 mov eax, [esi]
1060 mov edx, [esi + 4]
1061
1062 lock cmpxchg8b [edi]
1063
1064 mov [esi], eax
1065 mov [esi + 4], edx
1066 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
1067
1068 pop ebp
1069 pop ebx
1070 pop edi
1071 pop esi
1072 ret 8
1073%endif
1074ENDPROC iemAImpl_cmpxchg8b
1075
1076BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
1077 ; Lazy bird always lock prefixes cmpxchg8b.
1078 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
1079ENDPROC iemAImpl_cmpxchg8b_locked
1080
1081%ifdef RT_ARCH_AMD64
1082
1083;
1084; CMPXCHG16B.
1085;
1086; These are tricky register wise, so the code is duplicated for each calling
1087; convention.
1088;
1089; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1090;
1091; C-proto:
1092; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
1093; uint32_t *pEFlags));
1094;
1095; Note! Identical to iemAImpl_cmpxchg8b.
1096;
1097BEGINCODE
1098BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
1099 %ifdef ASM_CALL64_MSC
1100 push rbx
1101
1102 mov r11, rdx ; pu64RaxRdx (is also T1)
1103 mov r10, rcx ; pu64Dst
1104
1105 mov rbx, [r8]
1106 mov rcx, [r8 + 8]
1107 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1108 mov rax, [r11]
1109 mov rdx, [r11 + 8]
1110
1111 lock cmpxchg16b [r10]
1112
1113 mov [r11], rax
1114 mov [r11 + 8], rdx
1115 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1116
1117 pop rbx
1118 ret
1119 %else
1120 push rbx
1121
1122 mov r10, rcx ; pEFlags
1123 mov r11, rdx ; pu64RbxRcx (is also T1)
1124
1125 mov rbx, [r11]
1126 mov rcx, [r11 + 8]
1127 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1128 mov rax, [rsi]
1129 mov rdx, [rsi + 8]
1130
1131 lock cmpxchg16b [rdi]
1132
1133 mov [rsi], rax
1134 mov [rsi + 8], rdx
1135 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1136
1137 pop rbx
1138 ret
1139
1140 %endif
1141ENDPROC iemAImpl_cmpxchg16b
1142
1143BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
1144 ; Lazy bird always lock prefixes cmpxchg16b.
1145 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
1146ENDPROC iemAImpl_cmpxchg16b_locked
1147
1148%endif ; RT_ARCH_AMD64
1149
1150
1151;
1152; CMPXCHG.
1153;
1154; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1155;
1156; C-proto:
1157; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
1158;
1159BEGINCODE
1160%macro IEMIMPL_CMPXCHG 2
1161BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
1162 PROLOGUE_4_ARGS
1163 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1164 mov al, [A1]
1165 %1 cmpxchg [A0], A2_8
1166 mov [A1], al
1167 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1168 EPILOGUE_4_ARGS
1169ENDPROC iemAImpl_cmpxchg_u8 %+ %2
1170
1171BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
1172 PROLOGUE_4_ARGS
1173 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1174 mov ax, [A1]
1175 %1 cmpxchg [A0], A2_16
1176 mov [A1], ax
1177 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1178 EPILOGUE_4_ARGS
1179ENDPROC iemAImpl_cmpxchg_u16 %+ %2
1180
1181BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
1182 PROLOGUE_4_ARGS
1183 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1184 mov eax, [A1]
1185 %1 cmpxchg [A0], A2_32
1186 mov [A1], eax
1187 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1188 EPILOGUE_4_ARGS
1189ENDPROC iemAImpl_cmpxchg_u32 %+ %2
1190
1191BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
1192%ifdef RT_ARCH_AMD64
1193 PROLOGUE_4_ARGS
1194 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1195 mov rax, [A1]
1196 %1 cmpxchg [A0], A2
1197 mov [A1], rax
1198 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1199 EPILOGUE_4_ARGS
1200%else
1201 ;
1202 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
1203 ;
1204 push esi
1205 push edi
1206 push ebx
1207 push ebp
1208
1209 mov edi, ecx ; pu64Dst
1210 mov esi, edx ; pu64Rax
1211 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
1212 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1213
1214 mov ebx, [ecx]
1215 mov ecx, [ecx + 4]
1216 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1217 mov eax, [esi]
1218 mov edx, [esi + 4]
1219
1220 lock cmpxchg8b [edi]
1221
1222 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
1223 jz .cmpxchg8b_not_equal
1224 cmp eax, eax ; just set the other flags.
1225.store:
1226 mov [esi], eax
1227 mov [esi + 4], edx
1228 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
1229
1230 pop ebp
1231 pop ebx
1232 pop edi
1233 pop esi
1234 ret 8
1235
1236.cmpxchg8b_not_equal:
1237 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1238 jne .store
1239 cmp [esi], eax
1240 jmp .store
1241
1242%endif
1243ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1244%endmacro ; IEMIMPL_CMPXCHG
1245
1246IEMIMPL_CMPXCHG , ,
1247IEMIMPL_CMPXCHG lock, _locked
1248
1249;;
1250; Macro for implementing a unary operator.
1251;
1252; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1253; variants, except on 32-bit system where the 64-bit accesses requires hand
1254; coding.
1255;
1256; All the functions takes a pointer to the destination memory operand in A0,
1257; the source register operand in A1 and a pointer to eflags in A2.
1258;
1259; @param 1 The instruction mnemonic.
1260; @param 2 The modified flags.
1261; @param 3 The undefined flags.
1262;
1263%macro IEMIMPL_UNARY_OP 3
1264BEGINCODE
1265BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1266 PROLOGUE_2_ARGS
1267 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1268 %1 byte [A0]
1269 IEM_SAVE_FLAGS A1, %2, %3
1270 EPILOGUE_2_ARGS
1271ENDPROC iemAImpl_ %+ %1 %+ _u8
1272
1273BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1274 PROLOGUE_2_ARGS
1275 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1276 lock %1 byte [A0]
1277 IEM_SAVE_FLAGS A1, %2, %3
1278 EPILOGUE_2_ARGS
1279ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1280
1281BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1282 PROLOGUE_2_ARGS
1283 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1284 %1 word [A0]
1285 IEM_SAVE_FLAGS A1, %2, %3
1286 EPILOGUE_2_ARGS
1287ENDPROC iemAImpl_ %+ %1 %+ _u16
1288
1289BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1290 PROLOGUE_2_ARGS
1291 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1292 lock %1 word [A0]
1293 IEM_SAVE_FLAGS A1, %2, %3
1294 EPILOGUE_2_ARGS
1295ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1296
1297BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1298 PROLOGUE_2_ARGS
1299 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1300 %1 dword [A0]
1301 IEM_SAVE_FLAGS A1, %2, %3
1302 EPILOGUE_2_ARGS
1303ENDPROC iemAImpl_ %+ %1 %+ _u32
1304
1305BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1306 PROLOGUE_2_ARGS
1307 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1308 lock %1 dword [A0]
1309 IEM_SAVE_FLAGS A1, %2, %3
1310 EPILOGUE_2_ARGS
1311ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1312
1313 %ifdef RT_ARCH_AMD64
1314BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1315 PROLOGUE_2_ARGS
1316 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1317 %1 qword [A0]
1318 IEM_SAVE_FLAGS A1, %2, %3
1319 EPILOGUE_2_ARGS
1320ENDPROC iemAImpl_ %+ %1 %+ _u64
1321
1322BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1323 PROLOGUE_2_ARGS
1324 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1325 lock %1 qword [A0]
1326 IEM_SAVE_FLAGS A1, %2, %3
1327 EPILOGUE_2_ARGS
1328ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1329 %endif ; RT_ARCH_AMD64
1330
1331%endmacro
1332
1333IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1334IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1335IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1336IEMIMPL_UNARY_OP not, 0, 0
1337
1338
1339;
1340; BSWAP. No flag changes.
1341;
1342; Each function takes one argument, pointer to the value to bswap
1343; (input/output). They all return void.
1344;
1345BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1346 PROLOGUE_1_ARGS
1347 mov T0_32, [A0] ; just in case any of the upper bits are used.
1348 db 66h
1349 bswap T0_32
1350 mov [A0], T0_32
1351 EPILOGUE_1_ARGS
1352ENDPROC iemAImpl_bswap_u16
1353
1354BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1355 PROLOGUE_1_ARGS
1356 mov T0_32, [A0]
1357 bswap T0_32
1358 mov [A0], T0_32
1359 EPILOGUE_1_ARGS
1360ENDPROC iemAImpl_bswap_u32
1361
1362BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1363%ifdef RT_ARCH_AMD64
1364 PROLOGUE_1_ARGS
1365 mov T0, [A0]
1366 bswap T0
1367 mov [A0], T0
1368 EPILOGUE_1_ARGS
1369%else
1370 PROLOGUE_1_ARGS
1371 mov T0, [A0]
1372 mov T1, [A0 + 4]
1373 bswap T0
1374 bswap T1
1375 mov [A0 + 4], T0
1376 mov [A0], T1
1377 EPILOGUE_1_ARGS
1378%endif
1379ENDPROC iemAImpl_bswap_u64
1380
1381
1382;;
1383; Macro for implementing a shift operation.
1384;
1385; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1386; 32-bit system where the 64-bit accesses requires hand coding.
1387;
1388; All the functions takes a pointer to the destination memory operand in A0,
1389; the shift count in A1 and a pointer to eflags in A2.
1390;
1391; @param 1 The instruction mnemonic.
1392; @param 2 The modified flags.
1393; @param 3 The undefined flags.
1394;
1395; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1396;
1397; @note the _intel and _amd variants are implemented in C.
1398;
1399%macro IEMIMPL_SHIFT_OP 3
1400BEGINCODE
1401BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1402 PROLOGUE_3_ARGS
1403 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1404 %ifdef ASM_CALL64_GCC
1405 mov cl, A1_8
1406 %1 byte [A0], cl
1407 %else
1408 xchg A1, A0
1409 %1 byte [A1], cl
1410 %endif
1411 IEM_SAVE_FLAGS A2, %2, %3
1412 EPILOGUE_3_ARGS
1413ENDPROC iemAImpl_ %+ %1 %+ _u8
1414
1415BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1416 PROLOGUE_3_ARGS
1417 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1418 %ifdef ASM_CALL64_GCC
1419 mov cl, A1_8
1420 %1 word [A0], cl
1421 %else
1422 xchg A1, A0
1423 %1 word [A1], cl
1424 %endif
1425 IEM_SAVE_FLAGS A2, %2, %3
1426 EPILOGUE_3_ARGS
1427ENDPROC iemAImpl_ %+ %1 %+ _u16
1428
1429BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1430 PROLOGUE_3_ARGS
1431 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1432 %ifdef ASM_CALL64_GCC
1433 mov cl, A1_8
1434 %1 dword [A0], cl
1435 %else
1436 xchg A1, A0
1437 %1 dword [A1], cl
1438 %endif
1439 IEM_SAVE_FLAGS A2, %2, %3
1440 EPILOGUE_3_ARGS
1441ENDPROC iemAImpl_ %+ %1 %+ _u32
1442
1443 %ifdef RT_ARCH_AMD64
1444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1445 PROLOGUE_3_ARGS
1446 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1447 %ifdef ASM_CALL64_GCC
1448 mov cl, A1_8
1449 %1 qword [A0], cl
1450 %else
1451 xchg A1, A0
1452 %1 qword [A1], cl
1453 %endif
1454 IEM_SAVE_FLAGS A2, %2, %3
1455 EPILOGUE_3_ARGS
1456ENDPROC iemAImpl_ %+ %1 %+ _u64
1457 %endif ; RT_ARCH_AMD64
1458
1459%endmacro
1460
1461IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1462IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1463IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1464IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1465IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1466IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1467IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1468
1469
1470;;
1471; Macro for implementing a double precision shift operation.
1472;
1473; This will generate code for the 16, 32 and 64 bit accesses, except on
1474; 32-bit system where the 64-bit accesses requires hand coding.
1475;
1476; The functions takes the destination operand (r/m) in A0, the source (reg) in
1477; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1478;
1479; @param 1 The instruction mnemonic.
1480; @param 2 The modified flags.
1481; @param 3 The undefined flags.
1482;
1483; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1484;
1485; @note the _intel and _amd variants are implemented in C.
1486;
1487%macro IEMIMPL_SHIFT_DBL_OP 3
1488BEGINCODE
1489BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1490 PROLOGUE_4_ARGS
1491 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1492 %ifdef ASM_CALL64_GCC
1493 xchg A3, A2
1494 %1 [A0], A1_16, cl
1495 xchg A3, A2
1496 %else
1497 xchg A0, A2
1498 %1 [A2], A1_16, cl
1499 %endif
1500 IEM_SAVE_FLAGS A3, %2, %3
1501 EPILOGUE_4_ARGS
1502ENDPROC iemAImpl_ %+ %1 %+ _u16
1503
1504BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1505 PROLOGUE_4_ARGS
1506 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1507 %ifdef ASM_CALL64_GCC
1508 xchg A3, A2
1509 %1 [A0], A1_32, cl
1510 xchg A3, A2
1511 %else
1512 xchg A0, A2
1513 %1 [A2], A1_32, cl
1514 %endif
1515 IEM_SAVE_FLAGS A3, %2, %3
1516 EPILOGUE_4_ARGS
1517ENDPROC iemAImpl_ %+ %1 %+ _u32
1518
1519 %ifdef RT_ARCH_AMD64
1520BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1521 PROLOGUE_4_ARGS
1522 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1523 %ifdef ASM_CALL64_GCC
1524 xchg A3, A2
1525 %1 [A0], A1, cl
1526 xchg A3, A2
1527 %else
1528 xchg A0, A2
1529 %1 [A2], A1, cl
1530 %endif
1531 IEM_SAVE_FLAGS A3, %2, %3
1532 EPILOGUE_4_ARGS_EX 12
1533ENDPROC iemAImpl_ %+ %1 %+ _u64
1534 %endif ; RT_ARCH_AMD64
1535
1536%endmacro
1537
1538IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1539IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1540
1541
1542;;
1543; Macro for implementing a multiplication operations.
1544;
1545; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1546; 32-bit system where the 64-bit accesses requires hand coding.
1547;
1548; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1549; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1550; pointer to eflags in A3.
1551;
1552; The functions all return 0 so the caller can be used for div/idiv as well as
1553; for the mul/imul implementation.
1554;
1555; @param 1 The instruction mnemonic.
1556; @param 2 The modified flags.
1557; @param 3 The undefined flags.
1558; @param 4 Name suffix.
1559; @param 5 EFLAGS behaviour: 0 for native, 1 for intel and 2 for AMD.
1560;
1561; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1562;
1563%macro IEMIMPL_MUL_OP 5
1564BEGINCODE
1565BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %4, 12
1566 PROLOGUE_3_ARGS
1567 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1568 mov al, [A0]
1569 %1 A1_8
1570 mov [A0], ax
1571 %if %5 != 1
1572 IEM_SAVE_FLAGS A2, %2, %3
1573 %else
1574 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %2, X86_EFL_AF | X86_EFL_ZF, ax, 8, xAX
1575 %endif
1576 xor eax, eax
1577 EPILOGUE_3_ARGS
1578ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %4
1579
1580BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %4, 16
1581 PROLOGUE_4_ARGS
1582 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1583 mov ax, [A0]
1584 %ifdef ASM_CALL64_GCC
1585 %1 A2_16
1586 mov [A0], ax
1587 mov [A1], dx
1588 %else
1589 mov T1, A1
1590 %1 A2_16
1591 mov [A0], ax
1592 mov [T1], dx
1593 %endif
1594 %if %5 != 1
1595 IEM_SAVE_FLAGS A3, %2, %3
1596 %else
1597 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, ax, 16, xAX
1598 %endif
1599 xor eax, eax
1600 EPILOGUE_4_ARGS
1601ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %4
1602
1603BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %4, 16
1604 PROLOGUE_4_ARGS
1605 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1606 mov eax, [A0]
1607 %ifdef ASM_CALL64_GCC
1608 %1 A2_32
1609 mov [A0], eax
1610 mov [A1], edx
1611 %else
1612 mov T1, A1
1613 %1 A2_32
1614 mov [A0], eax
1615 mov [T1], edx
1616 %endif
1617 %if %5 != 1
1618 IEM_SAVE_FLAGS A3, %2, %3
1619 %else
1620 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, eax, 32, xAX
1621 %endif
1622 xor eax, eax
1623 EPILOGUE_4_ARGS
1624ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %4
1625
1626 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1627BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %4, 20
1628 PROLOGUE_4_ARGS
1629 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1630 mov rax, [A0]
1631 %ifdef ASM_CALL64_GCC
1632 %1 A2
1633 mov [A0], rax
1634 mov [A1], rdx
1635 %else
1636 mov T1, A1
1637 %1 A2
1638 mov [A0], rax
1639 mov [T1], rdx
1640 %endif
1641 %if %5 != 1
1642 IEM_SAVE_FLAGS A3, %2, %3
1643 %else
1644 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, rax, 64, xAX
1645 %endif
1646 xor eax, eax
1647 EPILOGUE_4_ARGS_EX 12
1648ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %4
1649 %endif ; !RT_ARCH_AMD64
1650
1651%endmacro
1652
1653IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), , 0
1654IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), 0, _intel, 1
1655IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), 0, _amd, 2
1656IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), , 0
1657IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), 0, _intel, 1
1658IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), 0, _amd, 2
1659
1660
1661BEGINCODE
1662;;
1663; Worker function for negating a 32-bit number in T1:T0
1664; @uses None (T0,T1)
1665BEGINPROC iemAImpl_negate_T0_T1_u32
1666 push 0
1667 push 0
1668 xchg T0_32, [xSP]
1669 xchg T1_32, [xSP + xCB]
1670 sub T0_32, [xSP]
1671 sbb T1_32, [xSP + xCB]
1672 add xSP, xCB*2
1673 ret
1674ENDPROC iemAImpl_negate_T0_T1_u32
1675
1676%ifdef RT_ARCH_AMD64
1677;;
1678; Worker function for negating a 64-bit number in T1:T0
1679; @uses None (T0,T1)
1680BEGINPROC iemAImpl_negate_T0_T1_u64
1681 push 0
1682 push 0
1683 xchg T0, [xSP]
1684 xchg T1, [xSP + xCB]
1685 sub T0, [xSP]
1686 sbb T1, [xSP + xCB]
1687 add xSP, xCB*2
1688 ret
1689ENDPROC iemAImpl_negate_T0_T1_u64
1690%endif
1691
1692
1693;;
1694; Macro for implementing a division operations.
1695;
1696; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1697; 32-bit system where the 64-bit accesses requires hand coding.
1698;
1699; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1700; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1701; pointer to eflags in A3.
1702;
1703; The functions all return 0 on success and -1 if a divide error should be
1704; raised by the caller.
1705;
1706; @param 1 The instruction mnemonic.
1707; @param 2 The modified flags.
1708; @param 3 The undefined flags.
1709; @param 4 1 if signed, 0 if unsigned.
1710; @param 5 Function suffix.
1711; @param 6 EFLAGS variation: 0 for native, 1 for intel (ignored),
1712; 2 for AMD (set AF, clear PF, ZF and SF).
1713;
1714; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1715;
1716%macro IEMIMPL_DIV_OP 6
1717BEGINCODE
1718BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %5, 12
1719 PROLOGUE_3_ARGS
1720
1721 ; div by chainsaw check.
1722 test A1_8, A1_8
1723 jz .div_zero
1724
1725 ; Overflow check - unsigned division is simple to verify, haven't
1726 ; found a simple way to check signed division yet unfortunately.
1727 %if %4 == 0
1728 cmp [A0 + 1], A1_8
1729 jae .div_overflow
1730 %else
1731 mov T0_16, [A0] ; T0 = dividend
1732 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1733 test A1_8, A1_8
1734 js .divisor_negative
1735 test T0_16, T0_16
1736 jns .both_positive
1737 neg T0_16
1738.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1739 push T0 ; Start off like unsigned below.
1740 shr T0_16, 7
1741 cmp T0_8, A1_8
1742 pop T0
1743 jb .div_no_overflow
1744 ja .div_overflow
1745 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1746 cmp T0_8, A1_8
1747 jae .div_overflow
1748 jmp .div_no_overflow
1749
1750.divisor_negative:
1751 neg A1_8
1752 test T0_16, T0_16
1753 jns .one_of_each
1754 neg T0_16
1755.both_positive: ; Same as unsigned shifted by sign indicator bit.
1756 shr T0_16, 7
1757 cmp T0_8, A1_8
1758 jae .div_overflow
1759.div_no_overflow:
1760 mov A1, T1 ; restore divisor
1761 %endif
1762
1763 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1764 mov ax, [A0]
1765 %1 A1_8
1766 mov [A0], ax
1767 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1768 IEM_ADJUST_FLAGS A2, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1769 %else
1770 IEM_SAVE_FLAGS A2, %2, %3
1771 %endif
1772 xor eax, eax
1773
1774.return:
1775 EPILOGUE_3_ARGS
1776
1777.div_zero:
1778.div_overflow:
1779 mov eax, -1
1780 jmp .return
1781ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %5
1782
1783BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %5, 16
1784 PROLOGUE_4_ARGS
1785
1786 ; div by chainsaw check.
1787 test A2_16, A2_16
1788 jz .div_zero
1789
1790 ; Overflow check - unsigned division is simple to verify, haven't
1791 ; found a simple way to check signed division yet unfortunately.
1792 %if %4 == 0
1793 cmp [A1], A2_16
1794 jae .div_overflow
1795 %else
1796 mov T0_16, [A1]
1797 shl T0_32, 16
1798 mov T0_16, [A0] ; T0 = dividend
1799 mov T1, A2 ; T1 = divisor
1800 test T1_16, T1_16
1801 js .divisor_negative
1802 test T0_32, T0_32
1803 jns .both_positive
1804 neg T0_32
1805.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1806 push T0 ; Start off like unsigned below.
1807 shr T0_32, 15
1808 cmp T0_16, T1_16
1809 pop T0
1810 jb .div_no_overflow
1811 ja .div_overflow
1812 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1813 cmp T0_16, T1_16
1814 jae .div_overflow
1815 jmp .div_no_overflow
1816
1817.divisor_negative:
1818 neg T1_16
1819 test T0_32, T0_32
1820 jns .one_of_each
1821 neg T0_32
1822.both_positive: ; Same as unsigned shifted by sign indicator bit.
1823 shr T0_32, 15
1824 cmp T0_16, T1_16
1825 jae .div_overflow
1826.div_no_overflow:
1827 %endif
1828
1829 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1830 %ifdef ASM_CALL64_GCC
1831 mov T1, A2
1832 mov ax, [A0]
1833 mov dx, [A1]
1834 %1 T1_16
1835 mov [A0], ax
1836 mov [A1], dx
1837 %else
1838 mov T1, A1
1839 mov ax, [A0]
1840 mov dx, [T1]
1841 %1 A2_16
1842 mov [A0], ax
1843 mov [T1], dx
1844 %endif
1845 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1846 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1847 %else
1848 IEM_SAVE_FLAGS A3, %2, %3
1849 %endif
1850 xor eax, eax
1851
1852.return:
1853 EPILOGUE_4_ARGS
1854
1855.div_zero:
1856.div_overflow:
1857 mov eax, -1
1858 jmp .return
1859ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %5
1860
1861BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %5, 16
1862 PROLOGUE_4_ARGS
1863
1864 ; div by chainsaw check.
1865 test A2_32, A2_32
1866 jz .div_zero
1867
1868 ; Overflow check - unsigned division is simple to verify, haven't
1869 ; found a simple way to check signed division yet unfortunately.
1870 %if %4 == 0
1871 cmp [A1], A2_32
1872 jae .div_overflow
1873 %else
1874 push A2 ; save A2 so we modify it (we out of regs on x86).
1875 mov T0_32, [A0] ; T0 = dividend low
1876 mov T1_32, [A1] ; T1 = dividend high
1877 test A2_32, A2_32
1878 js .divisor_negative
1879 test T1_32, T1_32
1880 jns .both_positive
1881 call NAME(iemAImpl_negate_T0_T1_u32)
1882.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1883 push T0 ; Start off like unsigned below.
1884 shl T1_32, 1
1885 shr T0_32, 31
1886 or T1_32, T0_32
1887 cmp T1_32, A2_32
1888 pop T0
1889 jb .div_no_overflow
1890 ja .div_overflow
1891 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1892 cmp T0_32, A2_32
1893 jae .div_overflow
1894 jmp .div_no_overflow
1895
1896.divisor_negative:
1897 neg A2_32
1898 test T1_32, T1_32
1899 jns .one_of_each
1900 call NAME(iemAImpl_negate_T0_T1_u32)
1901.both_positive: ; Same as unsigned shifted by sign indicator bit.
1902 shl T1_32, 1
1903 shr T0_32, 31
1904 or T1_32, T0_32
1905 cmp T1_32, A2_32
1906 jae .div_overflow
1907.div_no_overflow:
1908 pop A2
1909 %endif
1910
1911 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1912 mov eax, [A0]
1913 %ifdef ASM_CALL64_GCC
1914 mov T1, A2
1915 mov eax, [A0]
1916 mov edx, [A1]
1917 %1 T1_32
1918 mov [A0], eax
1919 mov [A1], edx
1920 %else
1921 mov T1, A1
1922 mov eax, [A0]
1923 mov edx, [T1]
1924 %1 A2_32
1925 mov [A0], eax
1926 mov [T1], edx
1927 %endif
1928 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1929 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1930 %else
1931 IEM_SAVE_FLAGS A3, %2, %3
1932 %endif
1933 xor eax, eax
1934
1935.return:
1936 EPILOGUE_4_ARGS
1937
1938.div_overflow:
1939 %if %4 != 0
1940 pop A2
1941 %endif
1942.div_zero:
1943 mov eax, -1
1944 jmp .return
1945ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %5
1946
1947 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1948BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %5, 20
1949 PROLOGUE_4_ARGS
1950
1951 test A2, A2
1952 jz .div_zero
1953 %if %4 == 0
1954 cmp [A1], A2
1955 jae .div_overflow
1956 %else
1957 push A2 ; save A2 so we modify it (we out of regs on x86).
1958 mov T0, [A0] ; T0 = dividend low
1959 mov T1, [A1] ; T1 = dividend high
1960 test A2, A2
1961 js .divisor_negative
1962 test T1, T1
1963 jns .both_positive
1964 call NAME(iemAImpl_negate_T0_T1_u64)
1965.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1966 push T0 ; Start off like unsigned below.
1967 shl T1, 1
1968 shr T0, 63
1969 or T1, T0
1970 cmp T1, A2
1971 pop T0
1972 jb .div_no_overflow
1973 ja .div_overflow
1974 mov T1, 0x7fffffffffffffff
1975 and T0, T1 ; Special case for covering (divisor - 1).
1976 cmp T0, A2
1977 jae .div_overflow
1978 jmp .div_no_overflow
1979
1980.divisor_negative:
1981 neg A2
1982 test T1, T1
1983 jns .one_of_each
1984 call NAME(iemAImpl_negate_T0_T1_u64)
1985.both_positive: ; Same as unsigned shifted by sign indicator bit.
1986 shl T1, 1
1987 shr T0, 63
1988 or T1, T0
1989 cmp T1, A2
1990 jae .div_overflow
1991.div_no_overflow:
1992 pop A2
1993 %endif
1994
1995 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1996 mov rax, [A0]
1997 %ifdef ASM_CALL64_GCC
1998 mov T1, A2
1999 mov rax, [A0]
2000 mov rdx, [A1]
2001 %1 T1
2002 mov [A0], rax
2003 mov [A1], rdx
2004 %else
2005 mov T1, A1
2006 mov rax, [A0]
2007 mov rdx, [T1]
2008 %1 A2
2009 mov [A0], rax
2010 mov [T1], rdx
2011 %endif
2012 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2013 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
2014 %else
2015 IEM_SAVE_FLAGS A3, %2, %3
2016 %endif
2017 xor eax, eax
2018
2019.return:
2020 EPILOGUE_4_ARGS_EX 12
2021
2022.div_overflow:
2023 %if %4 != 0
2024 pop A2
2025 %endif
2026.div_zero:
2027 mov eax, -1
2028 jmp .return
2029ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %5
2030 %endif ; !RT_ARCH_AMD64
2031
2032%endmacro
2033
2034IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, , 0
2035IEMIMPL_DIV_OP div, 0, 0, 0, _intel, 1
2036IEMIMPL_DIV_OP div, 0, 0, 0, _amd, 2
2037IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1, , 0
2038IEMIMPL_DIV_OP idiv, 0, 0, 1, _intel, 1
2039IEMIMPL_DIV_OP idiv, 0, 0, 1, _amd, 2
2040
2041
2042;;
2043; Macro for implementing memory fence operation.
2044;
2045; No return value, no operands or anything.
2046;
2047; @param 1 The instruction.
2048;
2049%macro IEMIMPL_MEM_FENCE 1
2050BEGINCODE
2051BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
2052 %1
2053 ret
2054ENDPROC iemAImpl_ %+ %1
2055%endmacro
2056
2057IEMIMPL_MEM_FENCE lfence
2058IEMIMPL_MEM_FENCE sfence
2059IEMIMPL_MEM_FENCE mfence
2060
2061;;
2062; Alternative for non-SSE2 host.
2063;
2064BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
2065 push xAX
2066 xchg xAX, [xSP]
2067 add xSP, xCB
2068 ret
2069ENDPROC iemAImpl_alt_mem_fence
2070
2071
2072;;
2073; Initialize the FPU for the actual instruction being emulated, this means
2074; loading parts of the guest's control word and status word.
2075;
2076; @uses 24 bytes of stack.
2077; @param 1 Expression giving the address of the FXSTATE of the guest.
2078;
2079%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
2080 fnstenv [xSP]
2081
2082 ; FCW - for exception, precision and rounding control.
2083 movzx T0, word [%1 + X86FXSTATE.FCW]
2084 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
2085 mov [xSP + X86FSTENV32P.FCW], T0_16
2086
2087 ; FSW - for undefined C0, C1, C2, and C3.
2088 movzx T1, word [%1 + X86FXSTATE.FSW]
2089 and T1, X86_FSW_C_MASK
2090 movzx T0, word [xSP + X86FSTENV32P.FSW]
2091 and T0, X86_FSW_TOP_MASK
2092 or T0, T1
2093 mov [xSP + X86FSTENV32P.FSW], T0_16
2094
2095 fldenv [xSP]
2096%endmacro
2097
2098
2099;;
2100; Need to move this as well somewhere better?
2101;
2102struc IEMFPURESULT
2103 .r80Result resw 5
2104 .FSW resw 1
2105endstruc
2106
2107
2108;;
2109; Need to move this as well somewhere better?
2110;
2111struc IEMFPURESULTTWO
2112 .r80Result1 resw 5
2113 .FSW resw 1
2114 .r80Result2 resw 5
2115endstruc
2116
2117
2118;
2119;---------------------- 16-bit signed integer operations ----------------------
2120;
2121
2122
2123;;
2124; Converts a 16-bit floating point value to a 80-bit one (fpu register).
2125;
2126; @param A0 FPU context (fxsave).
2127; @param A1 Pointer to a IEMFPURESULT for the output.
2128; @param A2 Pointer to the 16-bit floating point value to convert.
2129;
2130BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i16, 12
2131 PROLOGUE_3_ARGS
2132 sub xSP, 20h
2133
2134 fninit
2135 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2136 fild word [A2]
2137
2138 fnstsw word [A1 + IEMFPURESULT.FSW]
2139 fnclex
2140 fstp tword [A1 + IEMFPURESULT.r80Result]
2141
2142 fninit
2143 add xSP, 20h
2144 EPILOGUE_3_ARGS
2145ENDPROC iemAImpl_fild_r80_from_i16
2146
2147
2148;;
2149; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
2150;
2151; @param A0 FPU context (fxsave).
2152; @param A1 Where to return the output FSW.
2153; @param A2 Where to store the 16-bit signed integer value.
2154; @param A3 Pointer to the 80-bit value.
2155;
2156BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
2157 PROLOGUE_4_ARGS
2158 sub xSP, 20h
2159
2160 fninit
2161 fld tword [A3]
2162 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2163 fistp word [A2]
2164
2165 fnstsw word [A1]
2166
2167 fninit
2168 add xSP, 20h
2169 EPILOGUE_4_ARGS
2170ENDPROC iemAImpl_fist_r80_to_i16
2171
2172
2173;;
2174; Store a 80-bit floating point value (register) as a 16-bit signed integer
2175; (memory) with truncation.
2176;
2177; @param A0 FPU context (fxsave).
2178; @param A1 Where to return the output FSW.
2179; @param A2 Where to store the 16-bit signed integer value.
2180; @param A3 Pointer to the 80-bit value.
2181;
2182BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
2183 PROLOGUE_4_ARGS
2184 sub xSP, 20h
2185
2186 fninit
2187 fld tword [A3]
2188 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2189 fisttp word [A2]
2190
2191 fnstsw word [A1]
2192
2193 fninit
2194 add xSP, 20h
2195 EPILOGUE_4_ARGS
2196ENDPROC iemAImpl_fistt_r80_to_i16
2197
2198
2199;;
2200; FPU instruction working on one 80-bit and one 16-bit signed integer value.
2201;
2202; @param 1 The instruction
2203;
2204; @param A0 FPU context (fxsave).
2205; @param A1 Pointer to a IEMFPURESULT for the output.
2206; @param A2 Pointer to the 80-bit value.
2207; @param A3 Pointer to the 16-bit value.
2208;
2209%macro IEMIMPL_FPU_R80_BY_I16 1
2210BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2211 PROLOGUE_4_ARGS
2212 sub xSP, 20h
2213
2214 fninit
2215 fld tword [A2]
2216 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2217 %1 word [A3]
2218
2219 fnstsw word [A1 + IEMFPURESULT.FSW]
2220 fnclex
2221 fstp tword [A1 + IEMFPURESULT.r80Result]
2222
2223 fninit
2224 add xSP, 20h
2225 EPILOGUE_4_ARGS
2226ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2227%endmacro
2228
2229IEMIMPL_FPU_R80_BY_I16 fiadd
2230IEMIMPL_FPU_R80_BY_I16 fimul
2231IEMIMPL_FPU_R80_BY_I16 fisub
2232IEMIMPL_FPU_R80_BY_I16 fisubr
2233IEMIMPL_FPU_R80_BY_I16 fidiv
2234IEMIMPL_FPU_R80_BY_I16 fidivr
2235
2236
2237;;
2238; FPU instruction working on one 80-bit and one 16-bit signed integer value,
2239; only returning FSW.
2240;
2241; @param 1 The instruction
2242;
2243; @param A0 FPU context (fxsave).
2244; @param A1 Where to store the output FSW.
2245; @param A2 Pointer to the 80-bit value.
2246; @param A3 Pointer to the 64-bit value.
2247;
2248%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
2249BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2250 PROLOGUE_4_ARGS
2251 sub xSP, 20h
2252
2253 fninit
2254 fld tword [A2]
2255 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2256 %1 word [A3]
2257
2258 fnstsw word [A1]
2259
2260 fninit
2261 add xSP, 20h
2262 EPILOGUE_4_ARGS
2263ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2264%endmacro
2265
2266IEMIMPL_FPU_R80_BY_I16_FSW ficom
2267
2268
2269
2270;
2271;---------------------- 32-bit signed integer operations ----------------------
2272;
2273
2274
2275;;
2276; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2277;
2278; @param A0 FPU context (fxsave).
2279; @param A1 Pointer to a IEMFPURESULT for the output.
2280; @param A2 Pointer to the 32-bit floating point value to convert.
2281;
2282BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i32, 12
2283 PROLOGUE_3_ARGS
2284 sub xSP, 20h
2285
2286 fninit
2287 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2288 fild dword [A2]
2289
2290 fnstsw word [A1 + IEMFPURESULT.FSW]
2291 fnclex
2292 fstp tword [A1 + IEMFPURESULT.r80Result]
2293
2294 fninit
2295 add xSP, 20h
2296 EPILOGUE_3_ARGS
2297ENDPROC iemAImpl_fild_r80_from_i32
2298
2299
2300;;
2301; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2302;
2303; @param A0 FPU context (fxsave).
2304; @param A1 Where to return the output FSW.
2305; @param A2 Where to store the 32-bit signed integer value.
2306; @param A3 Pointer to the 80-bit value.
2307;
2308BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2309 PROLOGUE_4_ARGS
2310 sub xSP, 20h
2311
2312 fninit
2313 fld tword [A3]
2314 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2315 fistp dword [A2]
2316
2317 fnstsw word [A1]
2318
2319 fninit
2320 add xSP, 20h
2321 EPILOGUE_4_ARGS
2322ENDPROC iemAImpl_fist_r80_to_i32
2323
2324
2325;;
2326; Store a 80-bit floating point value (register) as a 32-bit signed integer
2327; (memory) with truncation.
2328;
2329; @param A0 FPU context (fxsave).
2330; @param A1 Where to return the output FSW.
2331; @param A2 Where to store the 32-bit signed integer value.
2332; @param A3 Pointer to the 80-bit value.
2333;
2334BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2335 PROLOGUE_4_ARGS
2336 sub xSP, 20h
2337
2338 fninit
2339 fld tword [A3]
2340 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2341 fisttp dword [A2]
2342
2343 fnstsw word [A1]
2344
2345 fninit
2346 add xSP, 20h
2347 EPILOGUE_4_ARGS
2348ENDPROC iemAImpl_fistt_r80_to_i32
2349
2350
2351;;
2352; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2353;
2354; @param 1 The instruction
2355;
2356; @param A0 FPU context (fxsave).
2357; @param A1 Pointer to a IEMFPURESULT for the output.
2358; @param A2 Pointer to the 80-bit value.
2359; @param A3 Pointer to the 32-bit value.
2360;
2361%macro IEMIMPL_FPU_R80_BY_I32 1
2362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2363 PROLOGUE_4_ARGS
2364 sub xSP, 20h
2365
2366 fninit
2367 fld tword [A2]
2368 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2369 %1 dword [A3]
2370
2371 fnstsw word [A1 + IEMFPURESULT.FSW]
2372 fnclex
2373 fstp tword [A1 + IEMFPURESULT.r80Result]
2374
2375 fninit
2376 add xSP, 20h
2377 EPILOGUE_4_ARGS
2378ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2379%endmacro
2380
2381IEMIMPL_FPU_R80_BY_I32 fiadd
2382IEMIMPL_FPU_R80_BY_I32 fimul
2383IEMIMPL_FPU_R80_BY_I32 fisub
2384IEMIMPL_FPU_R80_BY_I32 fisubr
2385IEMIMPL_FPU_R80_BY_I32 fidiv
2386IEMIMPL_FPU_R80_BY_I32 fidivr
2387
2388
2389;;
2390; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2391; only returning FSW.
2392;
2393; @param 1 The instruction
2394;
2395; @param A0 FPU context (fxsave).
2396; @param A1 Where to store the output FSW.
2397; @param A2 Pointer to the 80-bit value.
2398; @param A3 Pointer to the 64-bit value.
2399;
2400%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2401BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2402 PROLOGUE_4_ARGS
2403 sub xSP, 20h
2404
2405 fninit
2406 fld tword [A2]
2407 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2408 %1 dword [A3]
2409
2410 fnstsw word [A1]
2411
2412 fninit
2413 add xSP, 20h
2414 EPILOGUE_4_ARGS
2415ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2416%endmacro
2417
2418IEMIMPL_FPU_R80_BY_I32_FSW ficom
2419
2420
2421
2422;
2423;---------------------- 64-bit signed integer operations ----------------------
2424;
2425
2426
2427;;
2428; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2429;
2430; @param A0 FPU context (fxsave).
2431; @param A1 Pointer to a IEMFPURESULT for the output.
2432; @param A2 Pointer to the 64-bit floating point value to convert.
2433;
2434BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i64, 12
2435 PROLOGUE_3_ARGS
2436 sub xSP, 20h
2437
2438 fninit
2439 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2440 fild qword [A2]
2441
2442 fnstsw word [A1 + IEMFPURESULT.FSW]
2443 fnclex
2444 fstp tword [A1 + IEMFPURESULT.r80Result]
2445
2446 fninit
2447 add xSP, 20h
2448 EPILOGUE_3_ARGS
2449ENDPROC iemAImpl_fild_r80_from_i64
2450
2451
2452;;
2453; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2454;
2455; @param A0 FPU context (fxsave).
2456; @param A1 Where to return the output FSW.
2457; @param A2 Where to store the 64-bit signed integer value.
2458; @param A3 Pointer to the 80-bit value.
2459;
2460BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2461 PROLOGUE_4_ARGS
2462 sub xSP, 20h
2463
2464 fninit
2465 fld tword [A3]
2466 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2467 fistp qword [A2]
2468
2469 fnstsw word [A1]
2470
2471 fninit
2472 add xSP, 20h
2473 EPILOGUE_4_ARGS
2474ENDPROC iemAImpl_fist_r80_to_i64
2475
2476
2477;;
2478; Store a 80-bit floating point value (register) as a 64-bit signed integer
2479; (memory) with truncation.
2480;
2481; @param A0 FPU context (fxsave).
2482; @param A1 Where to return the output FSW.
2483; @param A2 Where to store the 64-bit signed integer value.
2484; @param A3 Pointer to the 80-bit value.
2485;
2486BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2487 PROLOGUE_4_ARGS
2488 sub xSP, 20h
2489
2490 fninit
2491 fld tword [A3]
2492 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2493 fisttp qword [A2]
2494
2495 fnstsw word [A1]
2496
2497 fninit
2498 add xSP, 20h
2499 EPILOGUE_4_ARGS
2500ENDPROC iemAImpl_fistt_r80_to_i64
2501
2502
2503
2504;
2505;---------------------- 32-bit floating point operations ----------------------
2506;
2507
2508;;
2509; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2510;
2511; @param A0 FPU context (fxsave).
2512; @param A1 Pointer to a IEMFPURESULT for the output.
2513; @param A2 Pointer to the 32-bit floating point value to convert.
2514;
2515BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r32, 12
2516 PROLOGUE_3_ARGS
2517 sub xSP, 20h
2518
2519 fninit
2520 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2521 fld dword [A2]
2522
2523 fnstsw word [A1 + IEMFPURESULT.FSW]
2524 fnclex
2525 fstp tword [A1 + IEMFPURESULT.r80Result]
2526
2527 fninit
2528 add xSP, 20h
2529 EPILOGUE_3_ARGS
2530ENDPROC iemAImpl_fld_r80_from_r32
2531
2532
2533;;
2534; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2535;
2536; @param A0 FPU context (fxsave).
2537; @param A1 Where to return the output FSW.
2538; @param A2 Where to store the 32-bit value.
2539; @param A3 Pointer to the 80-bit value.
2540;
2541BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2542 PROLOGUE_4_ARGS
2543 sub xSP, 20h
2544
2545 fninit
2546 fld tword [A3]
2547 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2548 fst dword [A2]
2549
2550 fnstsw word [A1]
2551
2552 fninit
2553 add xSP, 20h
2554 EPILOGUE_4_ARGS
2555ENDPROC iemAImpl_fst_r80_to_r32
2556
2557
2558;;
2559; FPU instruction working on one 80-bit and one 32-bit floating point value.
2560;
2561; @param 1 The instruction
2562;
2563; @param A0 FPU context (fxsave).
2564; @param A1 Pointer to a IEMFPURESULT for the output.
2565; @param A2 Pointer to the 80-bit value.
2566; @param A3 Pointer to the 32-bit value.
2567;
2568%macro IEMIMPL_FPU_R80_BY_R32 1
2569BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2570 PROLOGUE_4_ARGS
2571 sub xSP, 20h
2572
2573 fninit
2574 fld tword [A2]
2575 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2576 %1 dword [A3]
2577
2578 fnstsw word [A1 + IEMFPURESULT.FSW]
2579 fnclex
2580 fstp tword [A1 + IEMFPURESULT.r80Result]
2581
2582 fninit
2583 add xSP, 20h
2584 EPILOGUE_4_ARGS
2585ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2586%endmacro
2587
2588IEMIMPL_FPU_R80_BY_R32 fadd
2589IEMIMPL_FPU_R80_BY_R32 fmul
2590IEMIMPL_FPU_R80_BY_R32 fsub
2591IEMIMPL_FPU_R80_BY_R32 fsubr
2592IEMIMPL_FPU_R80_BY_R32 fdiv
2593IEMIMPL_FPU_R80_BY_R32 fdivr
2594
2595
2596;;
2597; FPU instruction working on one 80-bit and one 32-bit floating point value,
2598; only returning FSW.
2599;
2600; @param 1 The instruction
2601;
2602; @param A0 FPU context (fxsave).
2603; @param A1 Where to store the output FSW.
2604; @param A2 Pointer to the 80-bit value.
2605; @param A3 Pointer to the 64-bit value.
2606;
2607%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2608BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2609 PROLOGUE_4_ARGS
2610 sub xSP, 20h
2611
2612 fninit
2613 fld tword [A2]
2614 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2615 %1 dword [A3]
2616
2617 fnstsw word [A1]
2618
2619 fninit
2620 add xSP, 20h
2621 EPILOGUE_4_ARGS
2622ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2623%endmacro
2624
2625IEMIMPL_FPU_R80_BY_R32_FSW fcom
2626
2627
2628
2629;
2630;---------------------- 64-bit floating point operations ----------------------
2631;
2632
2633;;
2634; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2635;
2636; @param A0 FPU context (fxsave).
2637; @param A1 Pointer to a IEMFPURESULT for the output.
2638; @param A2 Pointer to the 64-bit floating point value to convert.
2639;
2640BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r64, 12
2641 PROLOGUE_3_ARGS
2642 sub xSP, 20h
2643
2644 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2645 fld qword [A2]
2646
2647 fnstsw word [A1 + IEMFPURESULT.FSW]
2648 fnclex
2649 fstp tword [A1 + IEMFPURESULT.r80Result]
2650
2651 fninit
2652 add xSP, 20h
2653 EPILOGUE_3_ARGS
2654ENDPROC iemAImpl_fld_r80_from_r64
2655
2656
2657;;
2658; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2659;
2660; @param A0 FPU context (fxsave).
2661; @param A1 Where to return the output FSW.
2662; @param A2 Where to store the 64-bit value.
2663; @param A3 Pointer to the 80-bit value.
2664;
2665BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2666 PROLOGUE_4_ARGS
2667 sub xSP, 20h
2668
2669 fninit
2670 fld tword [A3]
2671 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2672 fst qword [A2]
2673
2674 fnstsw word [A1]
2675
2676 fninit
2677 add xSP, 20h
2678 EPILOGUE_4_ARGS
2679ENDPROC iemAImpl_fst_r80_to_r64
2680
2681
2682;;
2683; FPU instruction working on one 80-bit and one 64-bit floating point value.
2684;
2685; @param 1 The instruction
2686;
2687; @param A0 FPU context (fxsave).
2688; @param A1 Pointer to a IEMFPURESULT for the output.
2689; @param A2 Pointer to the 80-bit value.
2690; @param A3 Pointer to the 64-bit value.
2691;
2692%macro IEMIMPL_FPU_R80_BY_R64 1
2693BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2694 PROLOGUE_4_ARGS
2695 sub xSP, 20h
2696
2697 fninit
2698 fld tword [A2]
2699 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2700 %1 qword [A3]
2701
2702 fnstsw word [A1 + IEMFPURESULT.FSW]
2703 fnclex
2704 fstp tword [A1 + IEMFPURESULT.r80Result]
2705
2706 fninit
2707 add xSP, 20h
2708 EPILOGUE_4_ARGS
2709ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2710%endmacro
2711
2712IEMIMPL_FPU_R80_BY_R64 fadd
2713IEMIMPL_FPU_R80_BY_R64 fmul
2714IEMIMPL_FPU_R80_BY_R64 fsub
2715IEMIMPL_FPU_R80_BY_R64 fsubr
2716IEMIMPL_FPU_R80_BY_R64 fdiv
2717IEMIMPL_FPU_R80_BY_R64 fdivr
2718
2719;;
2720; FPU instruction working on one 80-bit and one 64-bit floating point value,
2721; only returning FSW.
2722;
2723; @param 1 The instruction
2724;
2725; @param A0 FPU context (fxsave).
2726; @param A1 Where to store the output FSW.
2727; @param A2 Pointer to the 80-bit value.
2728; @param A3 Pointer to the 64-bit value.
2729;
2730%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2731BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2732 PROLOGUE_4_ARGS
2733 sub xSP, 20h
2734
2735 fninit
2736 fld tword [A2]
2737 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2738 %1 qword [A3]
2739
2740 fnstsw word [A1]
2741
2742 fninit
2743 add xSP, 20h
2744 EPILOGUE_4_ARGS
2745ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2746%endmacro
2747
2748IEMIMPL_FPU_R80_BY_R64_FSW fcom
2749
2750
2751
2752;
2753;---------------------- 80-bit floating point operations ----------------------
2754;
2755
2756;;
2757; Loads a 80-bit floating point register value from memory.
2758;
2759; @param A0 FPU context (fxsave).
2760; @param A1 Pointer to a IEMFPURESULT for the output.
2761; @param A2 Pointer to the 80-bit floating point value to load.
2762;
2763BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2764 PROLOGUE_3_ARGS
2765 sub xSP, 20h
2766
2767 fninit
2768 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2769 fld tword [A2]
2770
2771 fnstsw word [A1 + IEMFPURESULT.FSW]
2772 fnclex
2773 fstp tword [A1 + IEMFPURESULT.r80Result]
2774
2775 fninit
2776 add xSP, 20h
2777 EPILOGUE_3_ARGS
2778ENDPROC iemAImpl_fld_r80_from_r80
2779
2780
2781;;
2782; Store a 80-bit floating point register to memory
2783;
2784; @param A0 FPU context (fxsave).
2785; @param A1 Where to return the output FSW.
2786; @param A2 Where to store the 80-bit value.
2787; @param A3 Pointer to the 80-bit register value.
2788;
2789BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2790 PROLOGUE_4_ARGS
2791 sub xSP, 20h
2792
2793 fninit
2794 fld tword [A3]
2795 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2796 fstp tword [A2]
2797
2798 fnstsw word [A1]
2799
2800 fninit
2801 add xSP, 20h
2802 EPILOGUE_4_ARGS
2803ENDPROC iemAImpl_fst_r80_to_r80
2804
2805
2806;;
2807; Loads an 80-bit floating point register value in BCD format from memory.
2808;
2809; @param A0 FPU context (fxsave).
2810; @param A1 Pointer to a IEMFPURESULT for the output.
2811; @param A2 Pointer to the 80-bit BCD value to load.
2812;
2813BEGINPROC_FASTCALL iemAImpl_fld_r80_from_d80, 12
2814 PROLOGUE_3_ARGS
2815 sub xSP, 20h
2816
2817 fninit
2818 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2819 fbld tword [A2]
2820
2821 fnstsw word [A1 + IEMFPURESULT.FSW]
2822 fnclex
2823 fstp tword [A1 + IEMFPURESULT.r80Result]
2824
2825 fninit
2826 add xSP, 20h
2827 EPILOGUE_3_ARGS
2828ENDPROC iemAImpl_fld_r80_from_d80
2829
2830
2831;;
2832; Store a 80-bit floating point register to memory as BCD
2833;
2834; @param A0 FPU context (fxsave).
2835; @param A1 Where to return the output FSW.
2836; @param A2 Where to store the 80-bit BCD value.
2837; @param A3 Pointer to the 80-bit register value.
2838;
2839BEGINPROC_FASTCALL iemAImpl_fst_r80_to_d80, 16
2840 PROLOGUE_4_ARGS
2841 sub xSP, 20h
2842
2843 fninit
2844 fld tword [A3]
2845 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2846 fbstp tword [A2]
2847
2848 fnstsw word [A1]
2849
2850 fninit
2851 add xSP, 20h
2852 EPILOGUE_4_ARGS
2853ENDPROC iemAImpl_fst_r80_to_d80
2854
2855
2856;;
2857; FPU instruction working on two 80-bit floating point values.
2858;
2859; @param 1 The instruction
2860;
2861; @param A0 FPU context (fxsave).
2862; @param A1 Pointer to a IEMFPURESULT for the output.
2863; @param A2 Pointer to the first 80-bit value (ST0)
2864; @param A3 Pointer to the second 80-bit value (STn).
2865;
2866%macro IEMIMPL_FPU_R80_BY_R80 2
2867BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2868 PROLOGUE_4_ARGS
2869 sub xSP, 20h
2870
2871 fninit
2872 fld tword [A3]
2873 fld tword [A2]
2874 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2875 %1 %2
2876
2877 fnstsw word [A1 + IEMFPURESULT.FSW]
2878 fnclex
2879 fstp tword [A1 + IEMFPURESULT.r80Result]
2880
2881 fninit
2882 add xSP, 20h
2883 EPILOGUE_4_ARGS
2884ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2885%endmacro
2886
2887IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2888IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2889IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2890IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2891IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2892IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2893IEMIMPL_FPU_R80_BY_R80 fprem, {}
2894IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2895IEMIMPL_FPU_R80_BY_R80 fscale, {}
2896
2897
2898;;
2899; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2900; storing the result in ST1 and popping the stack.
2901;
2902; @param 1 The instruction
2903;
2904; @param A0 FPU context (fxsave).
2905; @param A1 Pointer to a IEMFPURESULT for the output.
2906; @param A2 Pointer to the first 80-bit value (ST1).
2907; @param A3 Pointer to the second 80-bit value (ST0).
2908;
2909%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2910BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2911 PROLOGUE_4_ARGS
2912 sub xSP, 20h
2913
2914 fninit
2915 fld tword [A2]
2916 fld tword [A3]
2917 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2918 %1
2919
2920 fnstsw word [A1 + IEMFPURESULT.FSW]
2921 fnclex
2922 fstp tword [A1 + IEMFPURESULT.r80Result]
2923
2924 fninit
2925 add xSP, 20h
2926 EPILOGUE_4_ARGS
2927ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2928%endmacro
2929
2930IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2931IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2932IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2933
2934
2935;;
2936; FPU instruction working on two 80-bit floating point values, only
2937; returning FSW.
2938;
2939; @param 1 The instruction
2940;
2941; @param A0 FPU context (fxsave).
2942; @param A1 Pointer to a uint16_t for the resulting FSW.
2943; @param A2 Pointer to the first 80-bit value.
2944; @param A3 Pointer to the second 80-bit value.
2945;
2946%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2947BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2948 PROLOGUE_4_ARGS
2949 sub xSP, 20h
2950
2951 fninit
2952 fld tword [A3]
2953 fld tword [A2]
2954 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2955 %1 st0, st1
2956
2957 fnstsw word [A1]
2958
2959 fninit
2960 add xSP, 20h
2961 EPILOGUE_4_ARGS
2962ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2963%endmacro
2964
2965IEMIMPL_FPU_R80_BY_R80_FSW fcom
2966IEMIMPL_FPU_R80_BY_R80_FSW fucom
2967
2968
2969;;
2970; FPU instruction working on two 80-bit floating point values,
2971; returning FSW and EFLAGS (eax).
2972;
2973; @param 1 The instruction
2974;
2975; @returns EFLAGS in EAX.
2976; @param A0 FPU context (fxsave).
2977; @param A1 Pointer to a uint16_t for the resulting FSW.
2978; @param A2 Pointer to the first 80-bit value.
2979; @param A3 Pointer to the second 80-bit value.
2980;
2981%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2982BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2983 PROLOGUE_4_ARGS
2984 sub xSP, 20h
2985
2986 fninit
2987 fld tword [A3]
2988 fld tword [A2]
2989 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2990 %1 st1
2991
2992 fnstsw word [A1]
2993 pushf
2994 pop xAX
2995
2996 fninit
2997 add xSP, 20h
2998 EPILOGUE_4_ARGS
2999ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3000%endmacro
3001
3002IEMIMPL_FPU_R80_BY_R80_EFL fcomi
3003IEMIMPL_FPU_R80_BY_R80_EFL fucomi
3004
3005
3006;;
3007; FPU instruction working on one 80-bit floating point value.
3008;
3009; @param 1 The instruction
3010;
3011; @param A0 FPU context (fxsave).
3012; @param A1 Pointer to a IEMFPURESULT for the output.
3013; @param A2 Pointer to the 80-bit value.
3014;
3015%macro IEMIMPL_FPU_R80 1
3016BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3017 PROLOGUE_3_ARGS
3018 sub xSP, 20h
3019
3020 fninit
3021 fld tword [A2]
3022 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3023 %1
3024
3025 fnstsw word [A1 + IEMFPURESULT.FSW]
3026 fnclex
3027 fstp tword [A1 + IEMFPURESULT.r80Result]
3028
3029 fninit
3030 add xSP, 20h
3031 EPILOGUE_3_ARGS
3032ENDPROC iemAImpl_ %+ %1 %+ _r80
3033%endmacro
3034
3035IEMIMPL_FPU_R80 fchs
3036IEMIMPL_FPU_R80 fabs
3037IEMIMPL_FPU_R80 f2xm1
3038IEMIMPL_FPU_R80 fsqrt
3039IEMIMPL_FPU_R80 frndint
3040IEMIMPL_FPU_R80 fsin
3041IEMIMPL_FPU_R80 fcos
3042
3043
3044;;
3045; FPU instruction working on one 80-bit floating point value, only
3046; returning FSW.
3047;
3048; @param 1 The instruction
3049;
3050; @param A0 FPU context (fxsave).
3051; @param A1 Pointer to a uint16_t for the resulting FSW.
3052; @param A2 Pointer to the 80-bit value.
3053;
3054%macro IEMIMPL_FPU_R80_FSW 1
3055BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3056 PROLOGUE_3_ARGS
3057 sub xSP, 20h
3058
3059 fninit
3060 fld tword [A2]
3061 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3062 %1
3063
3064 fnstsw word [A1]
3065
3066 fninit
3067 add xSP, 20h
3068 EPILOGUE_3_ARGS
3069ENDPROC iemAImpl_ %+ %1 %+ _r80
3070%endmacro
3071
3072IEMIMPL_FPU_R80_FSW ftst
3073IEMIMPL_FPU_R80_FSW fxam
3074
3075
3076
3077;;
3078; FPU instruction loading a 80-bit floating point constant.
3079;
3080; @param 1 The instruction
3081;
3082; @param A0 FPU context (fxsave).
3083; @param A1 Pointer to a IEMFPURESULT for the output.
3084;
3085%macro IEMIMPL_FPU_R80_CONST 1
3086BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
3087 PROLOGUE_2_ARGS
3088 sub xSP, 20h
3089
3090 fninit
3091 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3092 %1
3093
3094 fnstsw word [A1 + IEMFPURESULT.FSW]
3095 fnclex
3096 fstp tword [A1 + IEMFPURESULT.r80Result]
3097
3098 fninit
3099 add xSP, 20h
3100 EPILOGUE_2_ARGS
3101ENDPROC iemAImpl_ %+ %1 %+
3102%endmacro
3103
3104IEMIMPL_FPU_R80_CONST fld1
3105IEMIMPL_FPU_R80_CONST fldl2t
3106IEMIMPL_FPU_R80_CONST fldl2e
3107IEMIMPL_FPU_R80_CONST fldpi
3108IEMIMPL_FPU_R80_CONST fldlg2
3109IEMIMPL_FPU_R80_CONST fldln2
3110IEMIMPL_FPU_R80_CONST fldz
3111
3112
3113;;
3114; FPU instruction working on one 80-bit floating point value, outputing two.
3115;
3116; @param 1 The instruction
3117;
3118; @param A0 FPU context (fxsave).
3119; @param A1 Pointer to a IEMFPURESULTTWO for the output.
3120; @param A2 Pointer to the 80-bit value.
3121;
3122%macro IEMIMPL_FPU_R80_R80 1
3123BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
3124 PROLOGUE_3_ARGS
3125 sub xSP, 20h
3126
3127 fninit
3128 fld tword [A2]
3129 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3130 %1
3131
3132 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
3133 fnclex
3134 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
3135 fnclex
3136 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
3137
3138 fninit
3139 add xSP, 20h
3140 EPILOGUE_3_ARGS
3141ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
3142%endmacro
3143
3144IEMIMPL_FPU_R80_R80 fptan
3145IEMIMPL_FPU_R80_R80 fxtract
3146IEMIMPL_FPU_R80_R80 fsincos
3147
3148
3149
3150
3151;---------------------- SSE and MMX Operations ----------------------
3152
3153;; @todo what do we need to do for MMX?
3154%macro IEMIMPL_MMX_PROLOGUE 0
3155%endmacro
3156%macro IEMIMPL_MMX_EPILOGUE 0
3157%endmacro
3158
3159;; @todo what do we need to do for SSE?
3160%macro IEMIMPL_SSE_PROLOGUE 0
3161%endmacro
3162%macro IEMIMPL_SSE_EPILOGUE 0
3163%endmacro
3164
3165
3166;;
3167; Media instruction working on two full sized registers.
3168;
3169; @param 1 The instruction
3170;
3171; @param A0 FPU context (fxsave).
3172; @param A1 Pointer to the first media register size operand (input/output).
3173; @param A2 Pointer to the second media register size operand (input).
3174;
3175%macro IEMIMPL_MEDIA_F2 1
3176BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3177 PROLOGUE_3_ARGS
3178 IEMIMPL_MMX_PROLOGUE
3179
3180 movq mm0, [A1]
3181 movq mm1, [A2]
3182 %1 mm0, mm1
3183 movq [A1], mm0
3184
3185 IEMIMPL_MMX_EPILOGUE
3186 EPILOGUE_3_ARGS
3187ENDPROC iemAImpl_ %+ %1 %+ _u64
3188
3189BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3190 PROLOGUE_3_ARGS
3191 IEMIMPL_SSE_PROLOGUE
3192
3193 movdqu xmm0, [A1]
3194 movdqu xmm1, [A2]
3195 %1 xmm0, xmm1
3196 movdqu [A1], xmm0
3197
3198 IEMIMPL_SSE_EPILOGUE
3199 EPILOGUE_3_ARGS
3200ENDPROC iemAImpl_ %+ %1 %+ _u128
3201%endmacro
3202
3203IEMIMPL_MEDIA_F2 pxor
3204IEMIMPL_MEDIA_F2 pcmpeqb
3205IEMIMPL_MEDIA_F2 pcmpeqw
3206IEMIMPL_MEDIA_F2 pcmpeqd
3207
3208
3209;;
3210; Media instruction working on one full sized and one half sized register (lower half).
3211;
3212; @param 1 The instruction
3213; @param 2 1 if MMX is included, 0 if not.
3214;
3215; @param A0 FPU context (fxsave).
3216; @param A1 Pointer to the first full sized media register operand (input/output).
3217; @param A2 Pointer to the second half sized media register operand (input).
3218;
3219%macro IEMIMPL_MEDIA_F1L1 2
3220 %if %2 != 0
3221BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3222 PROLOGUE_3_ARGS
3223 IEMIMPL_MMX_PROLOGUE
3224
3225 movq mm0, [A1]
3226 movd mm1, [A2]
3227 %1 mm0, mm1
3228 movq [A1], mm0
3229
3230 IEMIMPL_MMX_EPILOGUE
3231 EPILOGUE_3_ARGS
3232ENDPROC iemAImpl_ %+ %1 %+ _u64
3233 %endif
3234
3235BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3236 PROLOGUE_3_ARGS
3237 IEMIMPL_SSE_PROLOGUE
3238
3239 movdqu xmm0, [A1]
3240 movq xmm1, [A2]
3241 %1 xmm0, xmm1
3242 movdqu [A1], xmm0
3243
3244 IEMIMPL_SSE_EPILOGUE
3245 EPILOGUE_3_ARGS
3246ENDPROC iemAImpl_ %+ %1 %+ _u128
3247%endmacro
3248
3249IEMIMPL_MEDIA_F1L1 punpcklbw, 1
3250IEMIMPL_MEDIA_F1L1 punpcklwd, 1
3251IEMIMPL_MEDIA_F1L1 punpckldq, 1
3252IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
3253
3254
3255;;
3256; Media instruction working on one full sized and one half sized register (high half).
3257;
3258; @param 1 The instruction
3259; @param 2 1 if MMX is included, 0 if not.
3260;
3261; @param A0 FPU context (fxsave).
3262; @param A1 Pointer to the first full sized media register operand (input/output).
3263; @param A2 Pointer to the second full sized media register operand, where we
3264; will only use the upper half (input).
3265;
3266%macro IEMIMPL_MEDIA_F1H1 2
3267 %if %2 != 0
3268BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3269 PROLOGUE_3_ARGS
3270 IEMIMPL_MMX_PROLOGUE
3271
3272 movq mm0, [A1]
3273 movq mm1, [A2]
3274 %1 mm0, mm1
3275 movq [A1], mm0
3276
3277 IEMIMPL_MMX_EPILOGUE
3278 EPILOGUE_3_ARGS
3279ENDPROC iemAImpl_ %+ %1 %+ _u64
3280 %endif
3281
3282BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3283 PROLOGUE_3_ARGS
3284 IEMIMPL_SSE_PROLOGUE
3285
3286 movdqu xmm0, [A1]
3287 movdqu xmm1, [A2]
3288 %1 xmm0, xmm1
3289 movdqu [A1], xmm0
3290
3291 IEMIMPL_SSE_EPILOGUE
3292 EPILOGUE_3_ARGS
3293ENDPROC iemAImpl_ %+ %1 %+ _u128
3294%endmacro
3295
3296IEMIMPL_MEDIA_F1L1 punpckhbw, 1
3297IEMIMPL_MEDIA_F1L1 punpckhwd, 1
3298IEMIMPL_MEDIA_F1L1 punpckhdq, 1
3299IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
3300
3301
3302;
3303; Shufflers with evil 8-bit immediates.
3304;
3305
3306BEGINPROC_FASTCALL iemAImpl_pshufw, 16
3307 PROLOGUE_4_ARGS
3308 IEMIMPL_MMX_PROLOGUE
3309
3310 movq mm0, [A1]
3311 movq mm1, [A2]
3312 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
3313 lea T1, [.imm0 xWrtRIP]
3314 lea T1, [T1 + T0]
3315 call T1
3316 movq [A1], mm0
3317
3318 IEMIMPL_MMX_EPILOGUE
3319 EPILOGUE_4_ARGS
3320%assign bImm 0
3321%rep 256
3322.imm %+ bImm:
3323 pshufw mm0, mm1, bImm
3324 ret
3325 %assign bImm bImm + 1
3326%endrep
3327.immEnd: ; 256*5 == 0x500
3328dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3329dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3330ENDPROC iemAImpl_pshufw
3331
3332
3333%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3334BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
3335 PROLOGUE_4_ARGS
3336 IEMIMPL_SSE_PROLOGUE
3337
3338 movdqu xmm0, [A1]
3339 movdqu xmm1, [A2]
3340 lea T1, [.imm0 xWrtRIP]
3341 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
3342 lea T1, [T1 + T0*2]
3343 call T1
3344 movdqu [A1], xmm0
3345
3346 IEMIMPL_SSE_EPILOGUE
3347 EPILOGUE_4_ARGS
3348 %assign bImm 0
3349 %rep 256
3350.imm %+ bImm:
3351 %1 xmm0, xmm1, bImm
3352 ret
3353 %assign bImm bImm + 1
3354 %endrep
3355.immEnd: ; 256*6 == 0x600
3356dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3357dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3358ENDPROC iemAImpl_ %+ %1
3359%endmacro
3360
3361IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3362IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3363IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3364
3365
3366;
3367; Move byte mask.
3368;
3369
3370BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
3371 PROLOGUE_3_ARGS
3372 IEMIMPL_MMX_PROLOGUE
3373
3374 mov T0, [A1]
3375 movq mm1, [A2]
3376 pmovmskb T0, mm1
3377 mov [A1], T0
3378%ifdef RT_ARCH_X86
3379 mov dword [A1 + 4], 0
3380%endif
3381 IEMIMPL_MMX_EPILOGUE
3382 EPILOGUE_3_ARGS
3383ENDPROC iemAImpl_pmovmskb_u64
3384
3385BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3386 PROLOGUE_3_ARGS
3387 IEMIMPL_SSE_PROLOGUE
3388
3389 mov T0, [A1]
3390 movdqu xmm1, [A2]
3391 pmovmskb T0, xmm1
3392 mov [A1], T0
3393%ifdef RT_ARCH_X86
3394 mov dword [A1 + 4], 0
3395%endif
3396 IEMIMPL_SSE_EPILOGUE
3397 EPILOGUE_3_ARGS
3398ENDPROC iemAImpl_pmovmskb_u128
3399
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette