VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 48749

Last change on this file since 48749 was 48127, checked in by vboxsync, 11 years ago

IEM: Started on 64-bit ops on 32-bit hosts.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 79.8 KB
Line 
1; $Id: IEMAllAImpl.asm 48127 2013-08-28 14:48:16Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %endif ; RT_ARCH_AMD64
358
359 %if %2 != 0 ; locked versions requested?
360
361BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
362 PROLOGUE_3_ARGS
363 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
364 lock %1 byte [A0], A1_8
365 IEM_SAVE_FLAGS A2, %3, %4
366 EPILOGUE_3_ARGS
367ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
368
369BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
370 PROLOGUE_3_ARGS
371 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
372 lock %1 word [A0], A1_16
373 IEM_SAVE_FLAGS A2, %3, %4
374 EPILOGUE_3_ARGS
375ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
376
377BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
378 PROLOGUE_3_ARGS
379 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
380 lock %1 dword [A0], A1_32
381 IEM_SAVE_FLAGS A2, %3, %4
382 EPILOGUE_3_ARGS
383ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
384
385 %ifdef RT_ARCH_AMD64
386BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
387 PROLOGUE_3_ARGS
388 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
389 lock %1 qword [A0], A1
390 IEM_SAVE_FLAGS A2, %3, %4
391 EPILOGUE_3_ARGS_EX 8
392ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
393 %endif ; RT_ARCH_AMD64
394 %endif ; locked
395%endmacro
396
397; instr,lock,modified-flags.
398IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
399IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
403IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
406IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
407
408
409;;
410; Macro for implementing a bit operator.
411;
412; This will generate code for the 16, 32 and 64 bit accesses with locked
413; variants, except on 32-bit system where the 64-bit accesses requires hand
414; coding.
415;
416; All the functions takes a pointer to the destination memory operand in A0,
417; the source register operand in A1 and a pointer to eflags in A2.
418;
419; @param 1 The instruction mnemonic.
420; @param 2 Non-zero if there should be a locked version.
421; @param 3 The modified flags.
422; @param 4 The undefined flags.
423;
424%macro IEMIMPL_BIT_OP 4
425BEGINCODE
426BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
427 PROLOGUE_3_ARGS
428 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
429 %1 word [A0], A1_16
430 IEM_SAVE_FLAGS A2, %3, %4
431 EPILOGUE_3_ARGS
432ENDPROC iemAImpl_ %+ %1 %+ _u16
433
434BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
435 PROLOGUE_3_ARGS
436 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
437 %1 dword [A0], A1_32
438 IEM_SAVE_FLAGS A2, %3, %4
439 EPILOGUE_3_ARGS
440ENDPROC iemAImpl_ %+ %1 %+ _u32
441
442 %ifdef RT_ARCH_AMD64
443BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
444 PROLOGUE_3_ARGS
445 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
446 %1 qword [A0], A1
447 IEM_SAVE_FLAGS A2, %3, %4
448 EPILOGUE_3_ARGS_EX 8
449ENDPROC iemAImpl_ %+ %1 %+ _u64
450 %endif ; RT_ARCH_AMD64
451
452 %if %2 != 0 ; locked versions requested?
453
454BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
455 PROLOGUE_3_ARGS
456 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
457 lock %1 word [A0], A1_16
458 IEM_SAVE_FLAGS A2, %3, %4
459 EPILOGUE_3_ARGS
460ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
461
462BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
463 PROLOGUE_3_ARGS
464 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
465 lock %1 dword [A0], A1_32
466 IEM_SAVE_FLAGS A2, %3, %4
467 EPILOGUE_3_ARGS
468ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
469
470 %ifdef RT_ARCH_AMD64
471BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
472 PROLOGUE_3_ARGS
473 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
474 lock %1 qword [A0], A1
475 IEM_SAVE_FLAGS A2, %3, %4
476 EPILOGUE_3_ARGS_EX 8
477ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
478 %endif ; RT_ARCH_AMD64
479 %endif ; locked
480%endmacro
481IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
482IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485
486;;
487; Macro for implementing a bit search operator.
488;
489; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
490; system where the 64-bit accesses requires hand coding.
491;
492; All the functions takes a pointer to the destination memory operand in A0,
493; the source register operand in A1 and a pointer to eflags in A2.
494;
495; @param 1 The instruction mnemonic.
496; @param 2 The modified flags.
497; @param 3 The undefined flags.
498;
499%macro IEMIMPL_BIT_OP 3
500BEGINCODE
501BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
502 PROLOGUE_3_ARGS
503 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
504 %1 T0_16, A1_16
505 jz .unchanged_dst
506 mov [A0], T0_16
507.unchanged_dst:
508 IEM_SAVE_FLAGS A2, %2, %3
509 EPILOGUE_3_ARGS
510ENDPROC iemAImpl_ %+ %1 %+ _u16
511
512BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
513 PROLOGUE_3_ARGS
514 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
515 %1 T0_32, A1_32
516 jz .unchanged_dst
517 mov [A0], T0_32
518.unchanged_dst:
519 IEM_SAVE_FLAGS A2, %2, %3
520 EPILOGUE_3_ARGS
521ENDPROC iemAImpl_ %+ %1 %+ _u32
522
523 %ifdef RT_ARCH_AMD64
524BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
525 PROLOGUE_3_ARGS
526 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
527 %1 T0, A1
528 jz .unchanged_dst
529 mov [A0], T0
530.unchanged_dst:
531 IEM_SAVE_FLAGS A2, %2, %3
532 EPILOGUE_3_ARGS_EX 8
533ENDPROC iemAImpl_ %+ %1 %+ _u64
534 %endif ; RT_ARCH_AMD64
535%endmacro
536IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
537IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538
539
540;
541; IMUL is also a similar but yet different case (no lock, no mem dst).
542; The rDX:rAX variant of imul is handled together with mul further down.
543;
544BEGINCODE
545BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
546 PROLOGUE_3_ARGS
547 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
548 imul A1_16, word [A0]
549 mov [A0], A1_16
550 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
551 EPILOGUE_3_ARGS
552ENDPROC iemAImpl_imul_two_u16
553
554BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
555 PROLOGUE_3_ARGS
556 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
557 imul A1_32, dword [A0]
558 mov [A0], A1_32
559 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
560 EPILOGUE_3_ARGS
561ENDPROC iemAImpl_imul_two_u32
562
563%ifdef RT_ARCH_AMD64
564BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
565 PROLOGUE_3_ARGS
566 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
567 imul A1, qword [A0]
568 mov [A0], A1
569 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
570 EPILOGUE_3_ARGS_EX 8
571ENDPROC iemAImpl_imul_two_u64
572%endif ; RT_ARCH_AMD64
573
574
575;
576; XCHG for memory operands. This implies locking. No flag changes.
577;
578; Each function takes two arguments, first the pointer to the memory,
579; then the pointer to the register. They all return void.
580;
581BEGINCODE
582BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
583 PROLOGUE_2_ARGS
584 mov T0_8, [A1]
585 xchg [A0], T0_8
586 mov [A1], T0_8
587 EPILOGUE_2_ARGS
588ENDPROC iemAImpl_xchg_u8
589
590BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
591 PROLOGUE_2_ARGS
592 mov T0_16, [A1]
593 xchg [A0], T0_16
594 mov [A1], T0_16
595 EPILOGUE_2_ARGS
596ENDPROC iemAImpl_xchg_u16
597
598BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
599 PROLOGUE_2_ARGS
600 mov T0_32, [A1]
601 xchg [A0], T0_32
602 mov [A1], T0_32
603 EPILOGUE_2_ARGS
604ENDPROC iemAImpl_xchg_u32
605
606%ifdef RT_ARCH_AMD64
607BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
608 PROLOGUE_2_ARGS
609 mov T0, [A1]
610 xchg [A0], T0
611 mov [A1], T0
612 EPILOGUE_2_ARGS
613ENDPROC iemAImpl_xchg_u64
614%endif
615
616
617;
618; XADD for memory operands.
619;
620; Each function takes three arguments, first the pointer to the
621; memory/register, then the pointer to the register, and finally a pointer to
622; eflags. They all return void.
623;
624BEGINCODE
625BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
626 PROLOGUE_3_ARGS
627 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
628 mov T0_8, [A1]
629 xadd [A0], T0_8
630 mov [A1], T0_8
631 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
632 EPILOGUE_3_ARGS
633ENDPROC iemAImpl_xadd_u8
634
635BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
636 PROLOGUE_3_ARGS
637 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
638 mov T0_16, [A1]
639 xadd [A0], T0_16
640 mov [A1], T0_16
641 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
642 EPILOGUE_3_ARGS
643ENDPROC iemAImpl_xadd_u16
644
645BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
646 PROLOGUE_3_ARGS
647 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
648 mov T0_32, [A1]
649 xadd [A0], T0_32
650 mov [A1], T0_32
651 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
652 EPILOGUE_3_ARGS
653ENDPROC iemAImpl_xadd_u32
654
655%ifdef RT_ARCH_AMD64
656BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
657 PROLOGUE_3_ARGS
658 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
659 mov T0, [A1]
660 xadd [A0], T0
661 mov [A1], T0
662 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
663 EPILOGUE_3_ARGS
664ENDPROC iemAImpl_xadd_u64
665%endif ; RT_ARCH_AMD64
666
667BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
668 PROLOGUE_3_ARGS
669 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
670 mov T0_8, [A1]
671 lock xadd [A0], T0_8
672 mov [A1], T0_8
673 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
674 EPILOGUE_3_ARGS
675ENDPROC iemAImpl_xadd_u8_locked
676
677BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
678 PROLOGUE_3_ARGS
679 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
680 mov T0_16, [A1]
681 lock xadd [A0], T0_16
682 mov [A1], T0_16
683 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
684 EPILOGUE_3_ARGS
685ENDPROC iemAImpl_xadd_u16_locked
686
687BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
688 PROLOGUE_3_ARGS
689 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
690 mov T0_32, [A1]
691 lock xadd [A0], T0_32
692 mov [A1], T0_32
693 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
694 EPILOGUE_3_ARGS
695ENDPROC iemAImpl_xadd_u32_locked
696
697%ifdef RT_ARCH_AMD64
698BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
699 PROLOGUE_3_ARGS
700 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
701 mov T0, [A1]
702 lock xadd [A0], T0
703 mov [A1], T0
704 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
705 EPILOGUE_3_ARGS
706ENDPROC iemAImpl_xadd_u64_locked
707%endif ; RT_ARCH_AMD64
708
709
710;
711; CMPXCHG8B.
712;
713; These are tricky register wise, so the code is duplicated for each calling
714; convention.
715;
716; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
717;
718; C-proto:
719; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
720; uint32_t *pEFlags));
721;
722BEGINCODE
723BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
724%ifdef RT_ARCH_AMD64
725 %ifdef ASM_CALL64_MSC
726 push rbx
727
728 mov r11, rdx ; pu64EaxEdx (is also T1)
729 mov r10, rcx ; pu64Dst
730
731 mov ebx, [r8]
732 mov ecx, [r8 + 4]
733 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
734 mov eax, [r11]
735 mov edx, [r11 + 4]
736
737 lock cmpxchg8b [r10]
738
739 mov [r11], eax
740 mov [r11 + 4], edx
741 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
742
743 pop rbx
744 ret
745 %else
746 push rbx
747
748 mov r10, rcx ; pEFlags
749 mov r11, rdx ; pu64EbxEcx (is also T1)
750
751 mov ebx, [r11]
752 mov ecx, [r11 + 4]
753 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
754 mov eax, [rsi]
755 mov edx, [rsi + 4]
756
757 lock cmpxchg8b [rdi]
758
759 mov [rsi], eax
760 mov [rsi + 4], edx
761 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
762
763 pop rbx
764 ret
765
766 %endif
767%else
768 push esi
769 push edi
770 push ebx
771 push ebp
772
773 mov edi, ecx ; pu64Dst
774 mov esi, edx ; pu64EaxEdx
775 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
776 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
777
778 mov ebx, [ecx]
779 mov ecx, [ecx + 4]
780 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
781 mov eax, [esi]
782 mov edx, [esi + 4]
783
784 lock cmpxchg8b [edi]
785
786 mov [esi], eax
787 mov [esi + 4], edx
788 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
789
790 pop ebp
791 pop ebx
792 pop edi
793 pop esi
794 ret 8
795%endif
796ENDPROC iemAImpl_cmpxchg8b
797
798BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
799 ; Lazy bird always lock prefixes cmpxchg8b.
800 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
801ENDPROC iemAImpl_cmpxchg8b_locked
802
803
804
805;
806; CMPXCHG.
807;
808; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
809;
810; C-proto:
811; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
812;
813BEGINCODE
814%macro IEMIMPL_CMPXCHG 2
815BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
816 PROLOGUE_4_ARGS
817 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
818 mov al, [A1]
819 %1 cmpxchg [A0], A2_8
820 mov [A1], al
821 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
822 EPILOGUE_4_ARGS
823ENDPROC iemAImpl_cmpxchg_u8 %+ %2
824
825BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
826 PROLOGUE_4_ARGS
827 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
828 mov ax, [A1]
829 %1 cmpxchg [A0], A2_16
830 mov [A1], ax
831 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
832 EPILOGUE_4_ARGS
833ENDPROC iemAImpl_cmpxchg_u16 %+ %2
834
835BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
836 PROLOGUE_4_ARGS
837 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
838 mov eax, [A1]
839 %1 cmpxchg [A0], A2_32
840 mov [A1], eax
841 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
842 EPILOGUE_4_ARGS
843ENDPROC iemAImpl_cmpxchg_u32 %+ %2
844
845BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
846%ifdef RT_ARCH_AMD64
847 PROLOGUE_4_ARGS
848 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
849 mov rax, [A1]
850 %1 cmpxchg [A0], A2
851 mov [A1], rax
852 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
853 EPILOGUE_4_ARGS
854%else
855 ;
856 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
857 ;
858 push esi
859 push edi
860 push ebx
861 push ebp
862
863 mov edi, ecx ; pu64Dst
864 mov esi, edx ; pu64Rax
865 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
866 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
867
868 mov ebx, [ecx]
869 mov ecx, [ecx + 4]
870 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
871 mov eax, [esi]
872 mov edx, [esi + 4]
873
874 lock cmpxchg8b [edi]
875
876 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
877 jz .cmpxchg8b_not_equal
878 cmp eax, eax ; just set the other flags.
879.store:
880 mov [esi], eax
881 mov [esi + 4], edx
882 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
883
884 pop ebp
885 pop ebx
886 pop edi
887 pop esi
888 ret 8
889
890.cmpxchg8b_not_equal:
891 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
892 jne .store
893 cmp [esi], eax
894 jmp .store
895
896%endif
897ENDPROC iemAImpl_cmpxchg_u64 %+ %2
898%endmacro ; IEMIMPL_CMPXCHG
899
900IEMIMPL_CMPXCHG , ,
901IEMIMPL_CMPXCHG lock, _locked
902
903;;
904; Macro for implementing a unary operator.
905;
906; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
907; variants, except on 32-bit system where the 64-bit accesses requires hand
908; coding.
909;
910; All the functions takes a pointer to the destination memory operand in A0,
911; the source register operand in A1 and a pointer to eflags in A2.
912;
913; @param 1 The instruction mnemonic.
914; @param 2 The modified flags.
915; @param 3 The undefined flags.
916;
917%macro IEMIMPL_UNARY_OP 3
918BEGINCODE
919BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
920 PROLOGUE_2_ARGS
921 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
922 %1 byte [A0]
923 IEM_SAVE_FLAGS A1, %2, %3
924 EPILOGUE_2_ARGS
925ENDPROC iemAImpl_ %+ %1 %+ _u8
926
927BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
928 PROLOGUE_2_ARGS
929 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
930 lock %1 byte [A0]
931 IEM_SAVE_FLAGS A1, %2, %3
932 EPILOGUE_2_ARGS
933ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
934
935BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
936 PROLOGUE_2_ARGS
937 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
938 %1 word [A0]
939 IEM_SAVE_FLAGS A1, %2, %3
940 EPILOGUE_2_ARGS
941ENDPROC iemAImpl_ %+ %1 %+ _u16
942
943BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
944 PROLOGUE_2_ARGS
945 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
946 lock %1 word [A0]
947 IEM_SAVE_FLAGS A1, %2, %3
948 EPILOGUE_2_ARGS
949ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
950
951BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
952 PROLOGUE_2_ARGS
953 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
954 %1 dword [A0]
955 IEM_SAVE_FLAGS A1, %2, %3
956 EPILOGUE_2_ARGS
957ENDPROC iemAImpl_ %+ %1 %+ _u32
958
959BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
960 PROLOGUE_2_ARGS
961 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
962 lock %1 dword [A0]
963 IEM_SAVE_FLAGS A1, %2, %3
964 EPILOGUE_2_ARGS
965ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
966
967 %ifdef RT_ARCH_AMD64
968BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
969 PROLOGUE_2_ARGS
970 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
971 %1 qword [A0]
972 IEM_SAVE_FLAGS A1, %2, %3
973 EPILOGUE_2_ARGS
974ENDPROC iemAImpl_ %+ %1 %+ _u64
975
976BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
977 PROLOGUE_2_ARGS
978 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
979 lock %1 qword [A0]
980 IEM_SAVE_FLAGS A1, %2, %3
981 EPILOGUE_2_ARGS
982ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
983 %endif ; RT_ARCH_AMD64
984
985%endmacro
986
987IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
988IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
989IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
990IEMIMPL_UNARY_OP not, 0, 0
991
992
993;;
994; Macro for implementing memory fence operation.
995;
996; No return value, no operands or anything.
997;
998; @param 1 The instruction.
999;
1000%macro IEMIMPL_MEM_FENCE 1
1001BEGINCODE
1002BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1003 %1
1004 ret
1005ENDPROC iemAImpl_ %+ %1
1006%endmacro
1007
1008IEMIMPL_MEM_FENCE lfence
1009IEMIMPL_MEM_FENCE sfence
1010IEMIMPL_MEM_FENCE mfence
1011
1012;;
1013; Alternative for non-SSE2 host.
1014;
1015BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1016 push xAX
1017 xchg xAX, [xSP]
1018 add xSP, xCB
1019 ret
1020ENDPROC iemAImpl_alt_mem_fence
1021
1022
1023
1024;;
1025; Macro for implementing a shift operation.
1026;
1027; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1028; 32-bit system where the 64-bit accesses requires hand coding.
1029;
1030; All the functions takes a pointer to the destination memory operand in A0,
1031; the shift count in A1 and a pointer to eflags in A2.
1032;
1033; @param 1 The instruction mnemonic.
1034; @param 2 The modified flags.
1035; @param 3 The undefined flags.
1036;
1037; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1038;
1039%macro IEMIMPL_SHIFT_OP 3
1040BEGINCODE
1041BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1042 PROLOGUE_3_ARGS
1043 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1044 %ifdef ASM_CALL64_GCC
1045 mov cl, A1_8
1046 %1 byte [A0], cl
1047 %else
1048 xchg A1, A0
1049 %1 byte [A1], cl
1050 %endif
1051 IEM_SAVE_FLAGS A2, %2, %3
1052 EPILOGUE_3_ARGS
1053ENDPROC iemAImpl_ %+ %1 %+ _u8
1054
1055BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1056 PROLOGUE_3_ARGS
1057 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1058 %ifdef ASM_CALL64_GCC
1059 mov cl, A1_8
1060 %1 word [A0], cl
1061 %else
1062 xchg A1, A0
1063 %1 word [A1], cl
1064 %endif
1065 IEM_SAVE_FLAGS A2, %2, %3
1066 EPILOGUE_3_ARGS
1067ENDPROC iemAImpl_ %+ %1 %+ _u16
1068
1069BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1070 PROLOGUE_3_ARGS
1071 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1072 %ifdef ASM_CALL64_GCC
1073 mov cl, A1_8
1074 %1 dword [A0], cl
1075 %else
1076 xchg A1, A0
1077 %1 dword [A1], cl
1078 %endif
1079 IEM_SAVE_FLAGS A2, %2, %3
1080 EPILOGUE_3_ARGS
1081ENDPROC iemAImpl_ %+ %1 %+ _u32
1082
1083 %ifdef RT_ARCH_AMD64
1084BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1085 PROLOGUE_3_ARGS
1086 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1087 %ifdef ASM_CALL64_GCC
1088 mov cl, A1_8
1089 %1 qword [A0], cl
1090 %else
1091 xchg A1, A0
1092 %1 qword [A1], cl
1093 %endif
1094 IEM_SAVE_FLAGS A2, %2, %3
1095 EPILOGUE_3_ARGS
1096ENDPROC iemAImpl_ %+ %1 %+ _u64
1097 %endif ; RT_ARCH_AMD64
1098
1099%endmacro
1100
1101IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1102IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1103IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1104IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1105IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1106IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1107IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1108
1109
1110;;
1111; Macro for implementing a double precision shift operation.
1112;
1113; This will generate code for the 16, 32 and 64 bit accesses, except on
1114; 32-bit system where the 64-bit accesses requires hand coding.
1115;
1116; The functions takes the destination operand (r/m) in A0, the source (reg) in
1117; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1118;
1119; @param 1 The instruction mnemonic.
1120; @param 2 The modified flags.
1121; @param 3 The undefined flags.
1122;
1123; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1124;
1125%macro IEMIMPL_SHIFT_DBL_OP 3
1126BEGINCODE
1127BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1128 PROLOGUE_4_ARGS
1129 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1130 %ifdef ASM_CALL64_GCC
1131 xchg A3, A2
1132 %1 [A0], A1_16, cl
1133 xchg A3, A2
1134 %else
1135 xchg A0, A2
1136 %1 [A2], A1_16, cl
1137 %endif
1138 IEM_SAVE_FLAGS A3, %2, %3
1139 EPILOGUE_4_ARGS
1140ENDPROC iemAImpl_ %+ %1 %+ _u16
1141
1142BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1143 PROLOGUE_4_ARGS
1144 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1145 %ifdef ASM_CALL64_GCC
1146 xchg A3, A2
1147 %1 [A0], A1_32, cl
1148 xchg A3, A2
1149 %else
1150 xchg A0, A2
1151 %1 [A2], A1_32, cl
1152 %endif
1153 IEM_SAVE_FLAGS A3, %2, %3
1154 EPILOGUE_4_ARGS
1155ENDPROC iemAImpl_ %+ %1 %+ _u32
1156
1157 %ifdef RT_ARCH_AMD64
1158BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1159 PROLOGUE_4_ARGS
1160 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1161 %ifdef ASM_CALL64_GCC
1162 xchg A3, A2
1163 %1 [A0], A1, cl
1164 xchg A3, A2
1165 %else
1166 xchg A0, A2
1167 %1 [A2], A1, cl
1168 %endif
1169 IEM_SAVE_FLAGS A3, %2, %3
1170 EPILOGUE_4_ARGS_EX 12
1171ENDPROC iemAImpl_ %+ %1 %+ _u64
1172 %endif ; RT_ARCH_AMD64
1173
1174%endmacro
1175
1176IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1177IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1178
1179
1180;;
1181; Macro for implementing a multiplication operations.
1182;
1183; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1184; 32-bit system where the 64-bit accesses requires hand coding.
1185;
1186; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1187; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1188; pointer to eflags in A3.
1189;
1190; The functions all return 0 so the caller can be used for div/idiv as well as
1191; for the mul/imul implementation.
1192;
1193; @param 1 The instruction mnemonic.
1194; @param 2 The modified flags.
1195; @param 3 The undefined flags.
1196;
1197; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1198;
1199%macro IEMIMPL_MUL_OP 3
1200BEGINCODE
1201BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1202 PROLOGUE_3_ARGS
1203 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1204 mov al, [A0]
1205 %1 A1_8
1206 mov [A0], ax
1207 IEM_SAVE_FLAGS A2, %2, %3
1208 xor eax, eax
1209 EPILOGUE_3_ARGS
1210ENDPROC iemAImpl_ %+ %1 %+ _u8
1211
1212BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1213 PROLOGUE_4_ARGS
1214 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1215 mov ax, [A0]
1216 %ifdef ASM_CALL64_GCC
1217 %1 A2_16
1218 mov [A0], ax
1219 mov [A1], dx
1220 %else
1221 mov T1, A1
1222 %1 A2_16
1223 mov [A0], ax
1224 mov [T1], dx
1225 %endif
1226 IEM_SAVE_FLAGS A3, %2, %3
1227 xor eax, eax
1228 EPILOGUE_4_ARGS
1229ENDPROC iemAImpl_ %+ %1 %+ _u16
1230
1231BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1232 PROLOGUE_4_ARGS
1233 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1234 mov eax, [A0]
1235 %ifdef ASM_CALL64_GCC
1236 %1 A2_32
1237 mov [A0], eax
1238 mov [A1], edx
1239 %else
1240 mov T1, A1
1241 %1 A2_32
1242 mov [A0], eax
1243 mov [T1], edx
1244 %endif
1245 IEM_SAVE_FLAGS A3, %2, %3
1246 xor eax, eax
1247 EPILOGUE_4_ARGS
1248ENDPROC iemAImpl_ %+ %1 %+ _u32
1249
1250 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1251BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1252 PROLOGUE_4_ARGS
1253 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1254 mov rax, [A0]
1255 %ifdef ASM_CALL64_GCC
1256 %1 A2
1257 mov [A0], rax
1258 mov [A1], rdx
1259 %else
1260 mov T1, A1
1261 %1 A2
1262 mov [A0], rax
1263 mov [T1], rdx
1264 %endif
1265 IEM_SAVE_FLAGS A3, %2, %3
1266 xor eax, eax
1267 EPILOGUE_4_ARGS_EX 12
1268ENDPROC iemAImpl_ %+ %1 %+ _u64
1269 %endif ; !RT_ARCH_AMD64
1270
1271%endmacro
1272
1273IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1274IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1275
1276
1277BEGINCODE
1278;;
1279; Worker function for negating a 32-bit number in T1:T0
1280; @uses None (T0,T1)
1281iemAImpl_negate_T0_T1_u32:
1282 push 0
1283 push 0
1284 xchg T0_32, [xSP]
1285 xchg T1_32, [xSP + xCB]
1286 sub T0_32, [xSP]
1287 sbb T1_32, [xSP + xCB]
1288 add xSP, xCB*2
1289 ret
1290
1291%ifdef RT_ARCH_AMD64
1292;;
1293; Worker function for negating a 64-bit number in T1:T0
1294; @uses None (T0,T1)
1295iemAImpl_negate_T0_T1_u64:
1296 push 0
1297 push 0
1298 xchg T0, [xSP]
1299 xchg T1, [xSP + xCB]
1300 sub T0, [xSP]
1301 sbb T1, [xSP + xCB]
1302 add xSP, xCB*2
1303 ret
1304%endif
1305
1306
1307;;
1308; Macro for implementing a division operations.
1309;
1310; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1311; 32-bit system where the 64-bit accesses requires hand coding.
1312;
1313; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1314; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1315; pointer to eflags in A3.
1316;
1317; The functions all return 0 on success and -1 if a divide error should be
1318; raised by the caller.
1319;
1320; @param 1 The instruction mnemonic.
1321; @param 2 The modified flags.
1322; @param 3 The undefined flags.
1323; @param 4 1 if signed, 0 if unsigned.
1324;
1325; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1326;
1327%macro IEMIMPL_DIV_OP 4
1328BEGINCODE
1329BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1330 PROLOGUE_3_ARGS
1331
1332 ; div by chainsaw check.
1333 test A1_8, A1_8
1334 jz .div_zero
1335
1336 ; Overflow check - unsigned division is simple to verify, haven't
1337 ; found a simple way to check signed division yet unfortunately.
1338 %if %4 == 0
1339 cmp [A0 + 1], A1_8
1340 jae .div_overflow
1341 %else
1342 mov T0_16, [A0] ; T0 = dividend
1343 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1344 test A1_8, A1_8
1345 js .divisor_negative
1346 test T0_16, T0_16
1347 jns .both_positive
1348 neg T0_16
1349.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1350 push T0 ; Start off like unsigned below.
1351 shr T0_16, 7
1352 cmp T0_8, A1_8
1353 pop T0
1354 jb .div_no_overflow
1355 ja .div_overflow
1356 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1357 cmp T0_8, A1_8
1358 jae .div_overflow
1359 jmp .div_no_overflow
1360
1361.divisor_negative:
1362 neg A1_8
1363 test T0_16, T0_16
1364 jns .one_of_each
1365 neg T0_16
1366.both_positive: ; Same as unsigned shifted by sign indicator bit.
1367 shr T0_16, 7
1368 cmp T0_8, A1_8
1369 jae .div_overflow
1370.div_no_overflow:
1371 mov A1, T1 ; restore divisor
1372 %endif
1373
1374 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1375 mov ax, [A0]
1376 %1 A1_8
1377 mov [A0], ax
1378 IEM_SAVE_FLAGS A2, %2, %3
1379 xor eax, eax
1380
1381.return:
1382 EPILOGUE_3_ARGS
1383
1384.div_zero:
1385.div_overflow:
1386 mov eax, -1
1387 jmp .return
1388ENDPROC iemAImpl_ %+ %1 %+ _u8
1389
1390BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1391 PROLOGUE_4_ARGS
1392
1393 ; div by chainsaw check.
1394 test A2_16, A2_16
1395 jz .div_zero
1396
1397 ; Overflow check - unsigned division is simple to verify, haven't
1398 ; found a simple way to check signed division yet unfortunately.
1399 %if %4 == 0
1400 cmp [A1], A2_16
1401 jae .div_overflow
1402 %else
1403 mov T0_16, [A1]
1404 shl T0_32, 16
1405 mov T0_16, [A0] ; T0 = dividend
1406 mov T1, A2 ; T1 = divisor
1407 test T1_16, T1_16
1408 js .divisor_negative
1409 test T0_32, T0_32
1410 jns .both_positive
1411 neg T0_32
1412.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1413 push T0 ; Start off like unsigned below.
1414 shr T0_32, 15
1415 cmp T0_16, T1_16
1416 pop T0
1417 jb .div_no_overflow
1418 ja .div_overflow
1419 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1420 cmp T0_16, T1_16
1421 jae .div_overflow
1422 jmp .div_no_overflow
1423
1424.divisor_negative:
1425 neg T1_16
1426 test T0_32, T0_32
1427 jns .one_of_each
1428 neg T0_32
1429.both_positive: ; Same as unsigned shifted by sign indicator bit.
1430 shr T0_32, 15
1431 cmp T0_16, T1_16
1432 jae .div_overflow
1433.div_no_overflow:
1434 %endif
1435
1436 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1437 %ifdef ASM_CALL64_GCC
1438 mov T1, A2
1439 mov ax, [A0]
1440 mov dx, [A1]
1441 %1 T1_16
1442 mov [A0], ax
1443 mov [A1], dx
1444 %else
1445 mov T1, A1
1446 mov ax, [A0]
1447 mov dx, [T1]
1448 %1 A2_16
1449 mov [A0], ax
1450 mov [T1], dx
1451 %endif
1452 IEM_SAVE_FLAGS A3, %2, %3
1453 xor eax, eax
1454
1455.return:
1456 EPILOGUE_4_ARGS
1457
1458.div_zero:
1459.div_overflow:
1460 mov eax, -1
1461 jmp .return
1462ENDPROC iemAImpl_ %+ %1 %+ _u16
1463
1464BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1465 PROLOGUE_4_ARGS
1466
1467 ; div by chainsaw check.
1468 test A2_32, A2_32
1469 jz .div_zero
1470
1471 ; Overflow check - unsigned division is simple to verify, haven't
1472 ; found a simple way to check signed division yet unfortunately.
1473 %if %4 == 0
1474 cmp [A1], A2_32
1475 jae .div_overflow
1476 %else
1477 push A2 ; save A2 so we modify it (we out of regs on x86).
1478 mov T0_32, [A0] ; T0 = dividend low
1479 mov T1_32, [A1] ; T1 = dividend high
1480 test A2_32, A2_32
1481 js .divisor_negative
1482 test T1_32, T1_32
1483 jns .both_positive
1484 call iemAImpl_negate_T0_T1_u32
1485.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1486 push T0 ; Start off like unsigned below.
1487 shl T1_32, 1
1488 shr T0_32, 31
1489 or T1_32, T0_32
1490 cmp T1_32, A2_32
1491 pop T0
1492 jb .div_no_overflow
1493 ja .div_overflow
1494 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1495 cmp T0_32, A2_32
1496 jae .div_overflow
1497 jmp .div_no_overflow
1498
1499.divisor_negative:
1500 neg A2_32
1501 test T1_32, T1_32
1502 jns .one_of_each
1503 call iemAImpl_negate_T0_T1_u32
1504.both_positive: ; Same as unsigned shifted by sign indicator bit.
1505 shl T1_32, 1
1506 shr T0_32, 31
1507 or T1_32, T0_32
1508 cmp T1_32, A2_32
1509 jae .div_overflow
1510.div_no_overflow:
1511 pop A2
1512 %endif
1513
1514 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1515 mov eax, [A0]
1516 %ifdef ASM_CALL64_GCC
1517 mov T1, A2
1518 mov eax, [A0]
1519 mov edx, [A1]
1520 %1 T1_32
1521 mov [A0], eax
1522 mov [A1], edx
1523 %else
1524 mov T1, A1
1525 mov eax, [A0]
1526 mov edx, [T1]
1527 %1 A2_32
1528 mov [A0], eax
1529 mov [T1], edx
1530 %endif
1531 IEM_SAVE_FLAGS A3, %2, %3
1532 xor eax, eax
1533
1534.return:
1535 EPILOGUE_4_ARGS
1536
1537.div_overflow:
1538 %if %4 != 0
1539 pop A2
1540 %endif
1541.div_zero:
1542 mov eax, -1
1543 jmp .return
1544ENDPROC iemAImpl_ %+ %1 %+ _u32
1545
1546 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1547BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1548 PROLOGUE_4_ARGS
1549
1550 test A2, A2
1551 jz .div_zero
1552 %if %4 == 0
1553 cmp [A1], A2
1554 jae .div_overflow
1555 %else
1556 push A2 ; save A2 so we modify it (we out of regs on x86).
1557 mov T0, [A0] ; T0 = dividend low
1558 mov T1, [A1] ; T1 = dividend high
1559 test A2, A2
1560 js .divisor_negative
1561 test T1, T1
1562 jns .both_positive
1563 call iemAImpl_negate_T0_T1_u64
1564.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1565 push T0 ; Start off like unsigned below.
1566 shl T1, 1
1567 shr T0, 63
1568 or T1, T0
1569 cmp T1, A2
1570 pop T0
1571 jb .div_no_overflow
1572 ja .div_overflow
1573 mov T1, 0x7fffffffffffffff
1574 and T0, T1 ; Special case for covering (divisor - 1).
1575 cmp T0, A2
1576 jae .div_overflow
1577 jmp .div_no_overflow
1578
1579.divisor_negative:
1580 neg A2
1581 test T1, T1
1582 jns .one_of_each
1583 call iemAImpl_negate_T0_T1_u64
1584.both_positive: ; Same as unsigned shifted by sign indicator bit.
1585 shl T1, 1
1586 shr T0, 63
1587 or T1, T0
1588 cmp T1, A2
1589 jae .div_overflow
1590.div_no_overflow:
1591 pop A2
1592 %endif
1593
1594 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1595 mov rax, [A0]
1596 %ifdef ASM_CALL64_GCC
1597 mov T1, A2
1598 mov rax, [A0]
1599 mov rdx, [A1]
1600 %1 T1
1601 mov [A0], rax
1602 mov [A1], rdx
1603 %else
1604 mov T1, A1
1605 mov rax, [A0]
1606 mov rdx, [T1]
1607 %1 A2
1608 mov [A0], rax
1609 mov [T1], rdx
1610 %endif
1611 IEM_SAVE_FLAGS A3, %2, %3
1612 xor eax, eax
1613
1614.return:
1615 EPILOGUE_4_ARGS_EX 12
1616
1617.div_overflow:
1618 %if %4 != 0
1619 pop A2
1620 %endif
1621.div_zero:
1622 mov eax, -1
1623 jmp .return
1624ENDPROC iemAImpl_ %+ %1 %+ _u64
1625 %endif ; !RT_ARCH_AMD64
1626
1627%endmacro
1628
1629IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1630IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1631
1632
1633;
1634; BSWAP. No flag changes.
1635;
1636; Each function takes one argument, pointer to the value to bswap
1637; (input/output). They all return void.
1638;
1639BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1640 PROLOGUE_1_ARGS
1641 mov T0_32, [A0] ; just in case any of the upper bits are used.
1642 db 66h
1643 bswap T0_32
1644 mov [A0], T0_32
1645 EPILOGUE_1_ARGS
1646ENDPROC iemAImpl_bswap_u16
1647
1648BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1649 PROLOGUE_1_ARGS
1650 mov T0_32, [A0]
1651 bswap T0_32
1652 mov [A0], T0_32
1653 EPILOGUE_1_ARGS
1654ENDPROC iemAImpl_bswap_u32
1655
1656BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1657%ifdef RT_ARCH_AMD64
1658 PROLOGUE_1_ARGS
1659 mov T0, [A0]
1660 bswap T0
1661 mov [A0], T0
1662 EPILOGUE_1_ARGS
1663%else
1664 PROLOGUE_1_ARGS
1665 mov T0, [A0]
1666 mov T1, [A0 + 4]
1667 bswap T0
1668 bswap T1
1669 mov [A0 + 4], T0
1670 mov [A0], T1
1671 EPILOGUE_1_ARGS
1672%endif
1673ENDPROC iemAImpl_bswap_u64
1674
1675
1676;;
1677; Initialize the FPU for the actual instruction being emulated, this means
1678; loading parts of the guest's control word and status word.
1679;
1680; @uses 24 bytes of stack.
1681; @param 1 Expression giving the address of the FXSTATE of the guest.
1682;
1683%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1684 fnstenv [xSP]
1685
1686 ; FCW - for exception, precision and rounding control.
1687 movzx T0, word [%1 + X86FXSTATE.FCW]
1688 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1689 mov [xSP + X86FSTENV32P.FCW], T0_16
1690
1691 ; FSW - for undefined C0, C1, C2, and C3.
1692 movzx T1, word [%1 + X86FXSTATE.FSW]
1693 and T1, X86_FSW_C_MASK
1694 movzx T0, word [xSP + X86FSTENV32P.FSW]
1695 and T0, X86_FSW_TOP_MASK
1696 or T0, T1
1697 mov [xSP + X86FSTENV32P.FSW], T0_16
1698
1699 fldenv [xSP]
1700%endmacro
1701
1702
1703;;
1704; Need to move this as well somewhere better?
1705;
1706struc IEMFPURESULT
1707 .r80Result resw 5
1708 .FSW resw 1
1709endstruc
1710
1711
1712;;
1713; Need to move this as well somewhere better?
1714;
1715struc IEMFPURESULTTWO
1716 .r80Result1 resw 5
1717 .FSW resw 1
1718 .r80Result2 resw 5
1719endstruc
1720
1721
1722;
1723;---------------------- 16-bit signed integer operations ----------------------
1724;
1725
1726
1727;;
1728; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1729;
1730; @param A0 FPU context (fxsave).
1731; @param A1 Pointer to a IEMFPURESULT for the output.
1732; @param A2 Pointer to the 16-bit floating point value to convert.
1733;
1734BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1735 PROLOGUE_3_ARGS
1736 sub xSP, 20h
1737
1738 fninit
1739 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1740 fild word [A2]
1741
1742 fnstsw word [A1 + IEMFPURESULT.FSW]
1743 fnclex
1744 fstp tword [A1 + IEMFPURESULT.r80Result]
1745
1746 fninit
1747 add xSP, 20h
1748 EPILOGUE_3_ARGS
1749ENDPROC iemAImpl_fild_i16_to_r80
1750
1751
1752;;
1753; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1754;
1755; @param A0 FPU context (fxsave).
1756; @param A1 Where to return the output FSW.
1757; @param A2 Where to store the 16-bit signed integer value.
1758; @param A3 Pointer to the 80-bit value.
1759;
1760BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1761 PROLOGUE_4_ARGS
1762 sub xSP, 20h
1763
1764 fninit
1765 fld tword [A3]
1766 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1767 fistp word [A2]
1768
1769 fnstsw word [A1]
1770
1771 fninit
1772 add xSP, 20h
1773 EPILOGUE_4_ARGS
1774ENDPROC iemAImpl_fist_r80_to_i16
1775
1776
1777;;
1778; Store a 80-bit floating point value (register) as a 16-bit signed integer
1779; (memory) with truncation.
1780;
1781; @param A0 FPU context (fxsave).
1782; @param A1 Where to return the output FSW.
1783; @param A2 Where to store the 16-bit signed integer value.
1784; @param A3 Pointer to the 80-bit value.
1785;
1786BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1787 PROLOGUE_4_ARGS
1788 sub xSP, 20h
1789
1790 fninit
1791 fld tword [A3]
1792 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1793 fisttp dword [A2]
1794
1795 fnstsw word [A1]
1796
1797 fninit
1798 add xSP, 20h
1799 EPILOGUE_4_ARGS
1800ENDPROC iemAImpl_fistt_r80_to_i16
1801
1802
1803;;
1804; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1805;
1806; @param 1 The instruction
1807;
1808; @param A0 FPU context (fxsave).
1809; @param A1 Pointer to a IEMFPURESULT for the output.
1810; @param A2 Pointer to the 80-bit value.
1811; @param A3 Pointer to the 16-bit value.
1812;
1813%macro IEMIMPL_FPU_R80_BY_I16 1
1814BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1815 PROLOGUE_4_ARGS
1816 sub xSP, 20h
1817
1818 fninit
1819 fld tword [A2]
1820 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1821 %1 word [A3]
1822
1823 fnstsw word [A1 + IEMFPURESULT.FSW]
1824 fnclex
1825 fstp tword [A1 + IEMFPURESULT.r80Result]
1826
1827 fninit
1828 add xSP, 20h
1829 EPILOGUE_4_ARGS
1830ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1831%endmacro
1832
1833IEMIMPL_FPU_R80_BY_I16 fiadd
1834IEMIMPL_FPU_R80_BY_I16 fimul
1835IEMIMPL_FPU_R80_BY_I16 fisub
1836IEMIMPL_FPU_R80_BY_I16 fisubr
1837IEMIMPL_FPU_R80_BY_I16 fidiv
1838IEMIMPL_FPU_R80_BY_I16 fidivr
1839
1840
1841;;
1842; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1843; only returning FSW.
1844;
1845; @param 1 The instruction
1846;
1847; @param A0 FPU context (fxsave).
1848; @param A1 Where to store the output FSW.
1849; @param A2 Pointer to the 80-bit value.
1850; @param A3 Pointer to the 64-bit value.
1851;
1852%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1853BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1854 PROLOGUE_4_ARGS
1855 sub xSP, 20h
1856
1857 fninit
1858 fld tword [A2]
1859 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1860 %1 word [A3]
1861
1862 fnstsw word [A1]
1863
1864 fninit
1865 add xSP, 20h
1866 EPILOGUE_4_ARGS
1867ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1868%endmacro
1869
1870IEMIMPL_FPU_R80_BY_I16_FSW ficom
1871
1872
1873
1874;
1875;---------------------- 32-bit signed integer operations ----------------------
1876;
1877
1878
1879;;
1880; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1881;
1882; @param A0 FPU context (fxsave).
1883; @param A1 Pointer to a IEMFPURESULT for the output.
1884; @param A2 Pointer to the 32-bit floating point value to convert.
1885;
1886BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1887 PROLOGUE_3_ARGS
1888 sub xSP, 20h
1889
1890 fninit
1891 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1892 fild dword [A2]
1893
1894 fnstsw word [A1 + IEMFPURESULT.FSW]
1895 fnclex
1896 fstp tword [A1 + IEMFPURESULT.r80Result]
1897
1898 fninit
1899 add xSP, 20h
1900 EPILOGUE_3_ARGS
1901ENDPROC iemAImpl_fild_i32_to_r80
1902
1903
1904;;
1905; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1906;
1907; @param A0 FPU context (fxsave).
1908; @param A1 Where to return the output FSW.
1909; @param A2 Where to store the 32-bit signed integer value.
1910; @param A3 Pointer to the 80-bit value.
1911;
1912BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1913 PROLOGUE_4_ARGS
1914 sub xSP, 20h
1915
1916 fninit
1917 fld tword [A3]
1918 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1919 fistp dword [A2]
1920
1921 fnstsw word [A1]
1922
1923 fninit
1924 add xSP, 20h
1925 EPILOGUE_4_ARGS
1926ENDPROC iemAImpl_fist_r80_to_i32
1927
1928
1929;;
1930; Store a 80-bit floating point value (register) as a 32-bit signed integer
1931; (memory) with truncation.
1932;
1933; @param A0 FPU context (fxsave).
1934; @param A1 Where to return the output FSW.
1935; @param A2 Where to store the 32-bit signed integer value.
1936; @param A3 Pointer to the 80-bit value.
1937;
1938BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1939 PROLOGUE_4_ARGS
1940 sub xSP, 20h
1941
1942 fninit
1943 fld tword [A3]
1944 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1945 fisttp dword [A2]
1946
1947 fnstsw word [A1]
1948
1949 fninit
1950 add xSP, 20h
1951 EPILOGUE_4_ARGS
1952ENDPROC iemAImpl_fistt_r80_to_i32
1953
1954
1955;;
1956; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1957;
1958; @param 1 The instruction
1959;
1960; @param A0 FPU context (fxsave).
1961; @param A1 Pointer to a IEMFPURESULT for the output.
1962; @param A2 Pointer to the 80-bit value.
1963; @param A3 Pointer to the 32-bit value.
1964;
1965%macro IEMIMPL_FPU_R80_BY_I32 1
1966BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1967 PROLOGUE_4_ARGS
1968 sub xSP, 20h
1969
1970 fninit
1971 fld tword [A2]
1972 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1973 %1 dword [A3]
1974
1975 fnstsw word [A1 + IEMFPURESULT.FSW]
1976 fnclex
1977 fstp tword [A1 + IEMFPURESULT.r80Result]
1978
1979 fninit
1980 add xSP, 20h
1981 EPILOGUE_4_ARGS
1982ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1983%endmacro
1984
1985IEMIMPL_FPU_R80_BY_I32 fiadd
1986IEMIMPL_FPU_R80_BY_I32 fimul
1987IEMIMPL_FPU_R80_BY_I32 fisub
1988IEMIMPL_FPU_R80_BY_I32 fisubr
1989IEMIMPL_FPU_R80_BY_I32 fidiv
1990IEMIMPL_FPU_R80_BY_I32 fidivr
1991
1992
1993;;
1994; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1995; only returning FSW.
1996;
1997; @param 1 The instruction
1998;
1999; @param A0 FPU context (fxsave).
2000; @param A1 Where to store the output FSW.
2001; @param A2 Pointer to the 80-bit value.
2002; @param A3 Pointer to the 64-bit value.
2003;
2004%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2005BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2006 PROLOGUE_4_ARGS
2007 sub xSP, 20h
2008
2009 fninit
2010 fld tword [A2]
2011 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2012 %1 dword [A3]
2013
2014 fnstsw word [A1]
2015
2016 fninit
2017 add xSP, 20h
2018 EPILOGUE_4_ARGS
2019ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2020%endmacro
2021
2022IEMIMPL_FPU_R80_BY_I32_FSW ficom
2023
2024
2025
2026;
2027;---------------------- 64-bit signed integer operations ----------------------
2028;
2029
2030
2031;;
2032; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2033;
2034; @param A0 FPU context (fxsave).
2035; @param A1 Pointer to a IEMFPURESULT for the output.
2036; @param A2 Pointer to the 64-bit floating point value to convert.
2037;
2038BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2039 PROLOGUE_3_ARGS
2040 sub xSP, 20h
2041
2042 fninit
2043 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2044 fild qword [A2]
2045
2046 fnstsw word [A1 + IEMFPURESULT.FSW]
2047 fnclex
2048 fstp tword [A1 + IEMFPURESULT.r80Result]
2049
2050 fninit
2051 add xSP, 20h
2052 EPILOGUE_3_ARGS
2053ENDPROC iemAImpl_fild_i64_to_r80
2054
2055
2056;;
2057; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2058;
2059; @param A0 FPU context (fxsave).
2060; @param A1 Where to return the output FSW.
2061; @param A2 Where to store the 64-bit signed integer value.
2062; @param A3 Pointer to the 80-bit value.
2063;
2064BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2065 PROLOGUE_4_ARGS
2066 sub xSP, 20h
2067
2068 fninit
2069 fld tword [A3]
2070 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2071 fistp qword [A2]
2072
2073 fnstsw word [A1]
2074
2075 fninit
2076 add xSP, 20h
2077 EPILOGUE_4_ARGS
2078ENDPROC iemAImpl_fist_r80_to_i64
2079
2080
2081;;
2082; Store a 80-bit floating point value (register) as a 64-bit signed integer
2083; (memory) with truncation.
2084;
2085; @param A0 FPU context (fxsave).
2086; @param A1 Where to return the output FSW.
2087; @param A2 Where to store the 64-bit signed integer value.
2088; @param A3 Pointer to the 80-bit value.
2089;
2090BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2091 PROLOGUE_4_ARGS
2092 sub xSP, 20h
2093
2094 fninit
2095 fld tword [A3]
2096 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2097 fisttp qword [A2]
2098
2099 fnstsw word [A1]
2100
2101 fninit
2102 add xSP, 20h
2103 EPILOGUE_4_ARGS
2104ENDPROC iemAImpl_fistt_r80_to_i64
2105
2106
2107
2108;
2109;---------------------- 32-bit floating point operations ----------------------
2110;
2111
2112;;
2113; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2114;
2115; @param A0 FPU context (fxsave).
2116; @param A1 Pointer to a IEMFPURESULT for the output.
2117; @param A2 Pointer to the 32-bit floating point value to convert.
2118;
2119BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2120 PROLOGUE_3_ARGS
2121 sub xSP, 20h
2122
2123 fninit
2124 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2125 fld dword [A2]
2126
2127 fnstsw word [A1 + IEMFPURESULT.FSW]
2128 fnclex
2129 fstp tword [A1 + IEMFPURESULT.r80Result]
2130
2131 fninit
2132 add xSP, 20h
2133 EPILOGUE_3_ARGS
2134ENDPROC iemAImpl_fld_r32_to_r80
2135
2136
2137;;
2138; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2139;
2140; @param A0 FPU context (fxsave).
2141; @param A1 Where to return the output FSW.
2142; @param A2 Where to store the 32-bit value.
2143; @param A3 Pointer to the 80-bit value.
2144;
2145BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2146 PROLOGUE_4_ARGS
2147 sub xSP, 20h
2148
2149 fninit
2150 fld tword [A3]
2151 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2152 fst dword [A2]
2153
2154 fnstsw word [A1]
2155
2156 fninit
2157 add xSP, 20h
2158 EPILOGUE_4_ARGS
2159ENDPROC iemAImpl_fst_r80_to_r32
2160
2161
2162;;
2163; FPU instruction working on one 80-bit and one 32-bit floating point value.
2164;
2165; @param 1 The instruction
2166;
2167; @param A0 FPU context (fxsave).
2168; @param A1 Pointer to a IEMFPURESULT for the output.
2169; @param A2 Pointer to the 80-bit value.
2170; @param A3 Pointer to the 32-bit value.
2171;
2172%macro IEMIMPL_FPU_R80_BY_R32 1
2173BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2174 PROLOGUE_4_ARGS
2175 sub xSP, 20h
2176
2177 fninit
2178 fld tword [A2]
2179 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2180 %1 dword [A3]
2181
2182 fnstsw word [A1 + IEMFPURESULT.FSW]
2183 fnclex
2184 fstp tword [A1 + IEMFPURESULT.r80Result]
2185
2186 fninit
2187 add xSP, 20h
2188 EPILOGUE_4_ARGS
2189ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2190%endmacro
2191
2192IEMIMPL_FPU_R80_BY_R32 fadd
2193IEMIMPL_FPU_R80_BY_R32 fmul
2194IEMIMPL_FPU_R80_BY_R32 fsub
2195IEMIMPL_FPU_R80_BY_R32 fsubr
2196IEMIMPL_FPU_R80_BY_R32 fdiv
2197IEMIMPL_FPU_R80_BY_R32 fdivr
2198
2199
2200;;
2201; FPU instruction working on one 80-bit and one 32-bit floating point value,
2202; only returning FSW.
2203;
2204; @param 1 The instruction
2205;
2206; @param A0 FPU context (fxsave).
2207; @param A1 Where to store the output FSW.
2208; @param A2 Pointer to the 80-bit value.
2209; @param A3 Pointer to the 64-bit value.
2210;
2211%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2212BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2213 PROLOGUE_4_ARGS
2214 sub xSP, 20h
2215
2216 fninit
2217 fld tword [A2]
2218 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2219 %1 dword [A3]
2220
2221 fnstsw word [A1]
2222
2223 fninit
2224 add xSP, 20h
2225 EPILOGUE_4_ARGS
2226ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2227%endmacro
2228
2229IEMIMPL_FPU_R80_BY_R32_FSW fcom
2230
2231
2232
2233;
2234;---------------------- 64-bit floating point operations ----------------------
2235;
2236
2237;;
2238; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2239;
2240; @param A0 FPU context (fxsave).
2241; @param A1 Pointer to a IEMFPURESULT for the output.
2242; @param A2 Pointer to the 64-bit floating point value to convert.
2243;
2244BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2245 PROLOGUE_3_ARGS
2246 sub xSP, 20h
2247
2248 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2249 fld qword [A2]
2250
2251 fnstsw word [A1 + IEMFPURESULT.FSW]
2252 fnclex
2253 fstp tword [A1 + IEMFPURESULT.r80Result]
2254
2255 fninit
2256 add xSP, 20h
2257 EPILOGUE_3_ARGS
2258ENDPROC iemAImpl_fld_r64_to_r80
2259
2260
2261;;
2262; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2263;
2264; @param A0 FPU context (fxsave).
2265; @param A1 Where to return the output FSW.
2266; @param A2 Where to store the 64-bit value.
2267; @param A3 Pointer to the 80-bit value.
2268;
2269BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2270 PROLOGUE_4_ARGS
2271 sub xSP, 20h
2272
2273 fninit
2274 fld tword [A3]
2275 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2276 fst qword [A2]
2277
2278 fnstsw word [A1]
2279
2280 fninit
2281 add xSP, 20h
2282 EPILOGUE_4_ARGS
2283ENDPROC iemAImpl_fst_r80_to_r64
2284
2285
2286;;
2287; FPU instruction working on one 80-bit and one 64-bit floating point value.
2288;
2289; @param 1 The instruction
2290;
2291; @param A0 FPU context (fxsave).
2292; @param A1 Pointer to a IEMFPURESULT for the output.
2293; @param A2 Pointer to the 80-bit value.
2294; @param A3 Pointer to the 64-bit value.
2295;
2296%macro IEMIMPL_FPU_R80_BY_R64 1
2297BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2298 PROLOGUE_4_ARGS
2299 sub xSP, 20h
2300
2301 fninit
2302 fld tword [A2]
2303 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2304 %1 qword [A3]
2305
2306 fnstsw word [A1 + IEMFPURESULT.FSW]
2307 fnclex
2308 fstp tword [A1 + IEMFPURESULT.r80Result]
2309
2310 fninit
2311 add xSP, 20h
2312 EPILOGUE_4_ARGS
2313ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2314%endmacro
2315
2316IEMIMPL_FPU_R80_BY_R64 fadd
2317IEMIMPL_FPU_R80_BY_R64 fmul
2318IEMIMPL_FPU_R80_BY_R64 fsub
2319IEMIMPL_FPU_R80_BY_R64 fsubr
2320IEMIMPL_FPU_R80_BY_R64 fdiv
2321IEMIMPL_FPU_R80_BY_R64 fdivr
2322
2323;;
2324; FPU instruction working on one 80-bit and one 64-bit floating point value,
2325; only returning FSW.
2326;
2327; @param 1 The instruction
2328;
2329; @param A0 FPU context (fxsave).
2330; @param A1 Where to store the output FSW.
2331; @param A2 Pointer to the 80-bit value.
2332; @param A3 Pointer to the 64-bit value.
2333;
2334%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2335BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2336 PROLOGUE_4_ARGS
2337 sub xSP, 20h
2338
2339 fninit
2340 fld tword [A2]
2341 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2342 %1 qword [A3]
2343
2344 fnstsw word [A1]
2345
2346 fninit
2347 add xSP, 20h
2348 EPILOGUE_4_ARGS
2349ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2350%endmacro
2351
2352IEMIMPL_FPU_R80_BY_R64_FSW fcom
2353
2354
2355
2356;
2357;---------------------- 80-bit floating point operations ----------------------
2358;
2359
2360;;
2361; Loads a 80-bit floating point register value from memory.
2362;
2363; @param A0 FPU context (fxsave).
2364; @param A1 Pointer to a IEMFPURESULT for the output.
2365; @param A2 Pointer to the 80-bit floating point value to load.
2366;
2367BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2368 PROLOGUE_3_ARGS
2369 sub xSP, 20h
2370
2371 fninit
2372 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2373 fld tword [A2]
2374
2375 fnstsw word [A1 + IEMFPURESULT.FSW]
2376 fnclex
2377 fstp tword [A1 + IEMFPURESULT.r80Result]
2378
2379 fninit
2380 add xSP, 20h
2381 EPILOGUE_3_ARGS
2382ENDPROC iemAImpl_fld_r80_from_r80
2383
2384
2385;;
2386; Store a 80-bit floating point register to memory
2387;
2388; @param A0 FPU context (fxsave).
2389; @param A1 Where to return the output FSW.
2390; @param A2 Where to store the 80-bit value.
2391; @param A3 Pointer to the 80-bit register value.
2392;
2393BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2394 PROLOGUE_4_ARGS
2395 sub xSP, 20h
2396
2397 fninit
2398 fld tword [A3]
2399 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2400 fstp tword [A2]
2401
2402 fnstsw word [A1]
2403
2404 fninit
2405 add xSP, 20h
2406 EPILOGUE_4_ARGS
2407ENDPROC iemAImpl_fst_r80_to_r80
2408
2409
2410;;
2411; FPU instruction working on two 80-bit floating point values.
2412;
2413; @param 1 The instruction
2414;
2415; @param A0 FPU context (fxsave).
2416; @param A1 Pointer to a IEMFPURESULT for the output.
2417; @param A2 Pointer to the first 80-bit value (ST0)
2418; @param A3 Pointer to the second 80-bit value (STn).
2419;
2420%macro IEMIMPL_FPU_R80_BY_R80 2
2421BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2422 PROLOGUE_4_ARGS
2423 sub xSP, 20h
2424
2425 fninit
2426 fld tword [A3]
2427 fld tword [A2]
2428 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2429 %1 %2
2430
2431 fnstsw word [A1 + IEMFPURESULT.FSW]
2432 fnclex
2433 fstp tword [A1 + IEMFPURESULT.r80Result]
2434
2435 fninit
2436 add xSP, 20h
2437 EPILOGUE_4_ARGS
2438ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2439%endmacro
2440
2441IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2442IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2443IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2444IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2445IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2446IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2447IEMIMPL_FPU_R80_BY_R80 fprem, {}
2448IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2449IEMIMPL_FPU_R80_BY_R80 fscale, {}
2450
2451
2452;;
2453; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2454; storing the result in ST1 and popping the stack.
2455;
2456; @param 1 The instruction
2457;
2458; @param A0 FPU context (fxsave).
2459; @param A1 Pointer to a IEMFPURESULT for the output.
2460; @param A2 Pointer to the first 80-bit value (ST1).
2461; @param A3 Pointer to the second 80-bit value (ST0).
2462;
2463%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2464BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2465 PROLOGUE_4_ARGS
2466 sub xSP, 20h
2467
2468 fninit
2469 fld tword [A2]
2470 fld tword [A3]
2471 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2472 %1
2473
2474 fnstsw word [A1 + IEMFPURESULT.FSW]
2475 fnclex
2476 fstp tword [A1 + IEMFPURESULT.r80Result]
2477
2478 fninit
2479 add xSP, 20h
2480 EPILOGUE_4_ARGS
2481ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2482%endmacro
2483
2484IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2485IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2486
2487
2488;;
2489; FPU instruction working on two 80-bit floating point values, only
2490; returning FSW.
2491;
2492; @param 1 The instruction
2493;
2494; @param A0 FPU context (fxsave).
2495; @param A1 Pointer to a uint16_t for the resulting FSW.
2496; @param A2 Pointer to the first 80-bit value.
2497; @param A3 Pointer to the second 80-bit value.
2498;
2499%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2500BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2501 PROLOGUE_4_ARGS
2502 sub xSP, 20h
2503
2504 fninit
2505 fld tword [A3]
2506 fld tword [A2]
2507 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2508 %1 st0, st1
2509
2510 fnstsw word [A1]
2511
2512 fninit
2513 add xSP, 20h
2514 EPILOGUE_4_ARGS
2515ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2516%endmacro
2517
2518IEMIMPL_FPU_R80_BY_R80_FSW fcom
2519IEMIMPL_FPU_R80_BY_R80_FSW fucom
2520
2521
2522;;
2523; FPU instruction working on two 80-bit floating point values,
2524; returning FSW and EFLAGS (eax).
2525;
2526; @param 1 The instruction
2527;
2528; @returns EFLAGS in EAX.
2529; @param A0 FPU context (fxsave).
2530; @param A1 Pointer to a uint16_t for the resulting FSW.
2531; @param A2 Pointer to the first 80-bit value.
2532; @param A3 Pointer to the second 80-bit value.
2533;
2534%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2535BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2536 PROLOGUE_4_ARGS
2537 sub xSP, 20h
2538
2539 fninit
2540 fld tword [A3]
2541 fld tword [A2]
2542 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2543 %1 st1
2544
2545 fnstsw word [A1]
2546 pushf
2547 pop xAX
2548
2549 fninit
2550 add xSP, 20h
2551 EPILOGUE_4_ARGS
2552ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2553%endmacro
2554
2555IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2556IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2557
2558
2559;;
2560; FPU instruction working on one 80-bit floating point value.
2561;
2562; @param 1 The instruction
2563;
2564; @param A0 FPU context (fxsave).
2565; @param A1 Pointer to a IEMFPURESULT for the output.
2566; @param A2 Pointer to the 80-bit value.
2567;
2568%macro IEMIMPL_FPU_R80 1
2569BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2570 PROLOGUE_3_ARGS
2571 sub xSP, 20h
2572
2573 fninit
2574 fld tword [A2]
2575 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2576 %1
2577
2578 fnstsw word [A1 + IEMFPURESULT.FSW]
2579 fnclex
2580 fstp tword [A1 + IEMFPURESULT.r80Result]
2581
2582 fninit
2583 add xSP, 20h
2584 EPILOGUE_3_ARGS
2585ENDPROC iemAImpl_ %+ %1 %+ _r80
2586%endmacro
2587
2588IEMIMPL_FPU_R80 fchs
2589IEMIMPL_FPU_R80 fabs
2590IEMIMPL_FPU_R80 f2xm1
2591IEMIMPL_FPU_R80 fyl2x
2592IEMIMPL_FPU_R80 fsqrt
2593IEMIMPL_FPU_R80 frndint
2594IEMIMPL_FPU_R80 fsin
2595IEMIMPL_FPU_R80 fcos
2596
2597
2598;;
2599; FPU instruction working on one 80-bit floating point value, only
2600; returning FSW.
2601;
2602; @param 1 The instruction
2603;
2604; @param A0 FPU context (fxsave).
2605; @param A1 Pointer to a uint16_t for the resulting FSW.
2606; @param A2 Pointer to the 80-bit value.
2607;
2608%macro IEMIMPL_FPU_R80_FSW 1
2609BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2610 PROLOGUE_3_ARGS
2611 sub xSP, 20h
2612
2613 fninit
2614 fld tword [A2]
2615 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2616 %1
2617
2618 fnstsw word [A1]
2619
2620 fninit
2621 add xSP, 20h
2622 EPILOGUE_3_ARGS
2623ENDPROC iemAImpl_ %+ %1 %+ _r80
2624%endmacro
2625
2626IEMIMPL_FPU_R80_FSW ftst
2627IEMIMPL_FPU_R80_FSW fxam
2628
2629
2630
2631;;
2632; FPU instruction loading a 80-bit floating point constant.
2633;
2634; @param 1 The instruction
2635;
2636; @param A0 FPU context (fxsave).
2637; @param A1 Pointer to a IEMFPURESULT for the output.
2638;
2639%macro IEMIMPL_FPU_R80_CONST 1
2640BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2641 PROLOGUE_2_ARGS
2642 sub xSP, 20h
2643
2644 fninit
2645 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2646 %1
2647
2648 fnstsw word [A1 + IEMFPURESULT.FSW]
2649 fnclex
2650 fstp tword [A1 + IEMFPURESULT.r80Result]
2651
2652 fninit
2653 add xSP, 20h
2654 EPILOGUE_2_ARGS
2655ENDPROC iemAImpl_ %+ %1 %+
2656%endmacro
2657
2658IEMIMPL_FPU_R80_CONST fld1
2659IEMIMPL_FPU_R80_CONST fldl2t
2660IEMIMPL_FPU_R80_CONST fldl2e
2661IEMIMPL_FPU_R80_CONST fldpi
2662IEMIMPL_FPU_R80_CONST fldlg2
2663IEMIMPL_FPU_R80_CONST fldln2
2664IEMIMPL_FPU_R80_CONST fldz
2665
2666
2667;;
2668; FPU instruction working on one 80-bit floating point value, outputing two.
2669;
2670; @param 1 The instruction
2671;
2672; @param A0 FPU context (fxsave).
2673; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2674; @param A2 Pointer to the 80-bit value.
2675;
2676%macro IEMIMPL_FPU_R80_R80 1
2677BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2678 PROLOGUE_3_ARGS
2679 sub xSP, 20h
2680
2681 fninit
2682 fld tword [A2]
2683 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2684 %1
2685
2686 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2687 fnclex
2688 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2689 fnclex
2690 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2691
2692 fninit
2693 add xSP, 20h
2694 EPILOGUE_3_ARGS
2695ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2696%endmacro
2697
2698IEMIMPL_FPU_R80_R80 fptan
2699IEMIMPL_FPU_R80_R80 fxtract
2700IEMIMPL_FPU_R80_R80 fsincos
2701
2702
2703
2704
2705;---------------------- SSE and MMX Operations ----------------------
2706
2707;; @todo what do we need to do for MMX?
2708%macro IEMIMPL_MMX_PROLOGUE 0
2709%endmacro
2710%macro IEMIMPL_MMX_EPILOGUE 0
2711%endmacro
2712
2713;; @todo what do we need to do for SSE?
2714%macro IEMIMPL_SSE_PROLOGUE 0
2715%endmacro
2716%macro IEMIMPL_SSE_EPILOGUE 0
2717%endmacro
2718
2719
2720;;
2721; Media instruction working on two full sized registers.
2722;
2723; @param 1 The instruction
2724;
2725; @param A0 FPU context (fxsave).
2726; @param A1 Pointer to the first media register size operand (input/output).
2727; @param A2 Pointer to the second media register size operand (input).
2728;
2729%macro IEMIMPL_MEDIA_F2 1
2730BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2731 PROLOGUE_3_ARGS
2732 IEMIMPL_MMX_PROLOGUE
2733
2734 movq mm0, [A1]
2735 movq mm1, [A2]
2736 %1 mm0, mm1
2737 movq [A1], mm0
2738
2739 IEMIMPL_MMX_EPILOGUE
2740 EPILOGUE_3_ARGS
2741ENDPROC iemAImpl_ %+ %1 %+ _u64
2742
2743BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2744 PROLOGUE_3_ARGS
2745 IEMIMPL_SSE_PROLOGUE
2746
2747 movdqu xmm0, [A1]
2748 movdqu xmm1, [A2]
2749 %1 xmm0, xmm1
2750 movdqu [A1], xmm0
2751
2752 IEMIMPL_SSE_EPILOGUE
2753 EPILOGUE_3_ARGS
2754ENDPROC iemAImpl_ %+ %1 %+ _u128
2755%endmacro
2756
2757IEMIMPL_MEDIA_F2 pxor
2758IEMIMPL_MEDIA_F2 pcmpeqb
2759IEMIMPL_MEDIA_F2 pcmpeqw
2760IEMIMPL_MEDIA_F2 pcmpeqd
2761
2762
2763;;
2764; Media instruction working on one full sized and one half sized register (lower half).
2765;
2766; @param 1 The instruction
2767; @param 2 1 if MMX is included, 0 if not.
2768;
2769; @param A0 FPU context (fxsave).
2770; @param A1 Pointer to the first full sized media register operand (input/output).
2771; @param A2 Pointer to the second half sized media register operand (input).
2772;
2773%macro IEMIMPL_MEDIA_F1L1 2
2774 %if %2 != 0
2775BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2776 PROLOGUE_3_ARGS
2777 IEMIMPL_MMX_PROLOGUE
2778
2779 movq mm0, [A1]
2780 movd mm1, [A2]
2781 %1 mm0, mm1
2782 movq [A1], mm0
2783
2784 IEMIMPL_MMX_EPILOGUE
2785 EPILOGUE_3_ARGS
2786ENDPROC iemAImpl_ %+ %1 %+ _u64
2787 %endif
2788
2789BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2790 PROLOGUE_3_ARGS
2791 IEMIMPL_SSE_PROLOGUE
2792
2793 movdqu xmm0, [A1]
2794 movq xmm1, [A2]
2795 %1 xmm0, xmm1
2796 movdqu [A1], xmm0
2797
2798 IEMIMPL_SSE_EPILOGUE
2799 EPILOGUE_3_ARGS
2800ENDPROC iemAImpl_ %+ %1 %+ _u128
2801%endmacro
2802
2803IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2804IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2805IEMIMPL_MEDIA_F1L1 punpckldq, 1
2806IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2807
2808
2809;;
2810; Media instruction working on one full sized and one half sized register (high half).
2811;
2812; @param 1 The instruction
2813; @param 2 1 if MMX is included, 0 if not.
2814;
2815; @param A0 FPU context (fxsave).
2816; @param A1 Pointer to the first full sized media register operand (input/output).
2817; @param A2 Pointer to the second full sized media register operand, where we
2818; will only use the upper half (input).
2819;
2820%macro IEMIMPL_MEDIA_F1H1 2
2821 %if %2 != 0
2822BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2823 PROLOGUE_3_ARGS
2824 IEMIMPL_MMX_PROLOGUE
2825
2826 movq mm0, [A1]
2827 movq mm1, [A2]
2828 %1 mm0, mm1
2829 movq [A1], mm0
2830
2831 IEMIMPL_MMX_EPILOGUE
2832 EPILOGUE_3_ARGS
2833ENDPROC iemAImpl_ %+ %1 %+ _u64
2834 %endif
2835
2836BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2837 PROLOGUE_3_ARGS
2838 IEMIMPL_SSE_PROLOGUE
2839
2840 movdqu xmm0, [A1]
2841 movdqu xmm1, [A2]
2842 %1 xmm0, xmm1
2843 movdqu [A1], xmm0
2844
2845 IEMIMPL_SSE_EPILOGUE
2846 EPILOGUE_3_ARGS
2847ENDPROC iemAImpl_ %+ %1 %+ _u128
2848%endmacro
2849
2850IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2851IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2852IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2853IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2854
2855
2856;
2857; Shufflers with evil 8-bit immediates.
2858;
2859
2860BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2861 PROLOGUE_4_ARGS
2862 IEMIMPL_MMX_PROLOGUE
2863
2864 movq mm0, [A1]
2865 movq mm1, [A2]
2866 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2867 lea T1, [.imm0 xWrtRIP]
2868 lea T1, [T1 + T0]
2869 call T1
2870 movq [A1], mm0
2871
2872 IEMIMPL_MMX_EPILOGUE
2873 EPILOGUE_4_ARGS
2874%assign bImm 0
2875%rep 256
2876.imm %+ bImm:
2877 pshufw mm0, mm1, bImm
2878 ret
2879 %assign bImm bImm + 1
2880%endrep
2881.immEnd: ; 256*5 == 0x500
2882dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2883dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2884ENDPROC iemAImpl_pshufw
2885
2886
2887%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2888BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2889 PROLOGUE_4_ARGS
2890 IEMIMPL_SSE_PROLOGUE
2891
2892 movdqu xmm0, [A1]
2893 movdqu xmm1, [A2]
2894 lea T1, [.imm0 xWrtRIP]
2895 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
2896 lea T1, [T1 + T0*2]
2897 call T1
2898 movdqu [A1], xmm0
2899
2900 IEMIMPL_SSE_EPILOGUE
2901 EPILOGUE_4_ARGS
2902 %assign bImm 0
2903 %rep 256
2904.imm %+ bImm:
2905 %1 xmm0, xmm1, bImm
2906 ret
2907 %assign bImm bImm + 1
2908 %endrep
2909.immEnd: ; 256*6 == 0x600
2910dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2911dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2912ENDPROC iemAImpl_ %+ %1
2913%endmacro
2914
2915IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2916IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2917IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2918
2919
2920;
2921; Move byte mask.
2922;
2923
2924BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2925 PROLOGUE_3_ARGS
2926 IEMIMPL_MMX_PROLOGUE
2927
2928 mov T0, [A1]
2929 movq mm1, [A2]
2930 pmovmskb T0, mm1
2931 mov [A1], T0
2932%ifdef RT_ARCH_X86
2933 mov dword [A1 + 4], 0
2934%endif
2935 IEMIMPL_MMX_EPILOGUE
2936 EPILOGUE_3_ARGS
2937ENDPROC iemAImpl_pmovmskb_u64
2938
2939BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
2940 PROLOGUE_3_ARGS
2941 IEMIMPL_SSE_PROLOGUE
2942
2943 mov T0, [A1]
2944 movdqu xmm1, [A2]
2945 pmovmskb T0, xmm1
2946 mov [A1], T0
2947%ifdef RT_ARCH_X86
2948 mov dword [A1 + 4], 0
2949%endif
2950 IEMIMPL_SSE_EPILOGUE
2951 EPILOGUE_3_ARGS
2952ENDPROC iemAImpl_pmovmskb_u128
2953
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette