VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 46995

Last change on this file since 46995 was 46995, checked in by vboxsync, 11 years ago

build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 70.4 KB
Line 
1; $Id: IEMAllAImpl.asm 46995 2013-07-05 09:34:17Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %else ; stub it for now - later, replace with hand coded stuff.
358BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359 int3
360 ret
361ENDPROC iemAImpl_ %+ %1 %+ _u64
362 %endif ; !RT_ARCH_AMD64
363
364 %if %2 != 0 ; locked versions requested?
365
366BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367 PROLOGUE_3_ARGS
368 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369 lock %1 byte [A0], A1_8
370 IEM_SAVE_FLAGS A2, %3, %4
371 EPILOGUE_3_ARGS
372ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375 PROLOGUE_3_ARGS
376 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377 lock %1 word [A0], A1_16
378 IEM_SAVE_FLAGS A2, %3, %4
379 EPILOGUE_3_ARGS
380ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383 PROLOGUE_3_ARGS
384 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385 lock %1 dword [A0], A1_32
386 IEM_SAVE_FLAGS A2, %3, %4
387 EPILOGUE_3_ARGS
388ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390 %ifdef RT_ARCH_AMD64
391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392 PROLOGUE_3_ARGS
393 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394 lock %1 qword [A0], A1
395 IEM_SAVE_FLAGS A2, %3, %4
396 EPILOGUE_3_ARGS_EX 8
397ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398 %else ; stub it for now - later, replace with hand coded stuff.
399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400 int3
401 ret 8
402ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403 %endif ; !RT_ARCH_AMD64
404 %endif ; locked
405%endmacro
406
407; instr,lock,modified-flags.
408IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
409IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
410IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
411IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
412IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
413IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
414IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
415IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
416IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
417
418
419;;
420; Macro for implementing a bit operator.
421;
422; This will generate code for the 16, 32 and 64 bit accesses with locked
423; variants, except on 32-bit system where the 64-bit accesses requires hand
424; coding.
425;
426; All the functions takes a pointer to the destination memory operand in A0,
427; the source register operand in A1 and a pointer to eflags in A2.
428;
429; @param 1 The instruction mnemonic.
430; @param 2 Non-zero if there should be a locked version.
431; @param 3 The modified flags.
432; @param 4 The undefined flags.
433;
434%macro IEMIMPL_BIT_OP 4
435BEGINCODE
436BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437 PROLOGUE_3_ARGS
438 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439 %1 word [A0], A1_16
440 IEM_SAVE_FLAGS A2, %3, %4
441 EPILOGUE_3_ARGS
442ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 dword [A0], A1_32
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS
450ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452 %ifdef RT_ARCH_AMD64
453BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454 PROLOGUE_3_ARGS
455 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456 %1 qword [A0], A1
457 IEM_SAVE_FLAGS A2, %3, %4
458 EPILOGUE_3_ARGS_EX 8
459ENDPROC iemAImpl_ %+ %1 %+ _u64
460 %else ; stub it for now - later, replace with hand coded stuff.
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462 int3
463 ret 8
464ENDPROC iemAImpl_ %+ %1 %+ _u64
465 %endif ; !RT_ARCH_AMD64
466
467 %if %2 != 0 ; locked versions requested?
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 lock %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 lock %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 lock %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493 %else ; stub it for now - later, replace with hand coded stuff.
494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495 int3
496 ret 8
497ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498 %endif ; !RT_ARCH_AMD64
499 %endif ; locked
500%endmacro
501IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
502IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
503IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
504IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
505
506;;
507; Macro for implementing a bit search operator.
508;
509; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510; system where the 64-bit accesses requires hand coding.
511;
512; All the functions takes a pointer to the destination memory operand in A0,
513; the source register operand in A1 and a pointer to eflags in A2.
514;
515; @param 1 The instruction mnemonic.
516; @param 2 The modified flags.
517; @param 3 The undefined flags.
518;
519%macro IEMIMPL_BIT_OP 3
520BEGINCODE
521BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522 PROLOGUE_3_ARGS
523 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524 %1 T0_16, A1_16
525 jz .unchanged_dst
526 mov [A0], T0_16
527.unchanged_dst:
528 IEM_SAVE_FLAGS A2, %2, %3
529 EPILOGUE_3_ARGS
530ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533 PROLOGUE_3_ARGS
534 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535 %1 T0_32, A1_32
536 jz .unchanged_dst
537 mov [A0], T0_32
538.unchanged_dst:
539 IEM_SAVE_FLAGS A2, %2, %3
540 EPILOGUE_3_ARGS
541ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543 %ifdef RT_ARCH_AMD64
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547 %1 T0, A1
548 jz .unchanged_dst
549 mov [A0], T0
550.unchanged_dst:
551 IEM_SAVE_FLAGS A2, %2, %3
552 EPILOGUE_3_ARGS_EX 8
553ENDPROC iemAImpl_ %+ %1 %+ _u64
554 %else ; stub it for now - later, replace with hand coded stuff.
555BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556 int3
557 ret 8
558ENDPROC iemAImpl_ %+ %1 %+ _u64
559 %endif ; !RT_ARCH_AMD64
560%endmacro
561IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
562IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
563
564
565;
566; IMUL is also a similar but yet different case (no lock, no mem dst).
567; The rDX:rAX variant of imul is handled together with mul further down.
568;
569BEGINCODE
570BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
573 imul A1_16, word [A0]
574 mov [A0], A1_16
575 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
576 EPILOGUE_3_ARGS
577ENDPROC iemAImpl_imul_two_u16
578
579BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
582 imul A1_32, dword [A0]
583 mov [A0], A1_32
584 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
585 EPILOGUE_3_ARGS
586ENDPROC iemAImpl_imul_two_u32
587
588BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589 PROLOGUE_3_ARGS
590%ifdef RT_ARCH_AMD64
591 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
592 imul A1, qword [A0]
593 mov [A0], A1
594 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
595%else
596 int3 ;; @todo implement me
597%endif
598 EPILOGUE_3_ARGS_EX 8
599ENDPROC iemAImpl_imul_two_u64
600
601
602;
603; XCHG for memory operands. This implies locking. No flag changes.
604;
605; Each function takes two arguments, first the pointer to the memory,
606; then the pointer to the register. They all return void.
607;
608BEGINCODE
609BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610 PROLOGUE_2_ARGS
611 mov T0_8, [A1]
612 xchg [A0], T0_8
613 mov [A1], T0_8
614 EPILOGUE_2_ARGS
615ENDPROC iemAImpl_xchg_u8
616
617BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618 PROLOGUE_2_ARGS
619 mov T0_16, [A1]
620 xchg [A0], T0_16
621 mov [A1], T0_16
622 EPILOGUE_2_ARGS
623ENDPROC iemAImpl_xchg_u16
624
625BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626 PROLOGUE_2_ARGS
627 mov T0_32, [A1]
628 xchg [A0], T0_32
629 mov [A1], T0_32
630 EPILOGUE_2_ARGS
631ENDPROC iemAImpl_xchg_u32
632
633BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634%ifdef RT_ARCH_AMD64
635 PROLOGUE_2_ARGS
636 mov T0, [A1]
637 xchg [A0], T0
638 mov [A1], T0
639 EPILOGUE_2_ARGS
640%else
641 int3
642 ret 0
643%endif
644ENDPROC iemAImpl_xchg_u64
645
646
647;
648; XADD for memory operands.
649;
650; Each function takes three arguments, first the pointer to the
651; memory/register, then the pointer to the register, and finally a pointer to
652; eflags. They all return void.
653;
654BEGINCODE
655BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656 PROLOGUE_3_ARGS
657 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
658 mov T0_8, [A1]
659 xadd [A0], T0_8
660 mov [A1], T0_8
661 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
662 EPILOGUE_3_ARGS
663ENDPROC iemAImpl_xadd_u8
664
665BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666 PROLOGUE_3_ARGS
667 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
668 mov T0_16, [A1]
669 xadd [A0], T0_16
670 mov [A1], T0_16
671 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
672 EPILOGUE_3_ARGS
673ENDPROC iemAImpl_xadd_u16
674
675BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676 PROLOGUE_3_ARGS
677 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
678 mov T0_32, [A1]
679 xadd [A0], T0_32
680 mov [A1], T0_32
681 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
682 EPILOGUE_3_ARGS
683ENDPROC iemAImpl_xadd_u32
684
685BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686%ifdef RT_ARCH_AMD64
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0, [A1]
690 xadd [A0], T0
691 mov [A1], T0
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694%else
695 int3
696 ret 4
697%endif
698ENDPROC iemAImpl_xadd_u64
699
700BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701 PROLOGUE_3_ARGS
702 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
703 mov T0_8, [A1]
704 lock xadd [A0], T0_8
705 mov [A1], T0_8
706 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_xadd_u8_locked
709
710BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711 PROLOGUE_3_ARGS
712 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
713 mov T0_16, [A1]
714 lock xadd [A0], T0_16
715 mov [A1], T0_16
716 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
717 EPILOGUE_3_ARGS
718ENDPROC iemAImpl_xadd_u16_locked
719
720BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721 PROLOGUE_3_ARGS
722 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
723 mov T0_32, [A1]
724 lock xadd [A0], T0_32
725 mov [A1], T0_32
726 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
727 EPILOGUE_3_ARGS
728ENDPROC iemAImpl_xadd_u32_locked
729
730BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731%ifdef RT_ARCH_AMD64
732 PROLOGUE_3_ARGS
733 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
734 mov T0, [A1]
735 lock xadd [A0], T0
736 mov [A1], T0
737 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
738 EPILOGUE_3_ARGS
739%else
740 int3
741 ret 4
742%endif
743ENDPROC iemAImpl_xadd_u64_locked
744
745
746;
747; CMPXCHG8B.
748;
749; These are tricky register wise, so the code is duplicated for each calling
750; convention.
751;
752; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753;
754; C-proto:
755; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756; uint32_t *pEFlags));
757;
758BEGINCODE
759BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760%ifdef RT_ARCH_AMD64
761 %ifdef ASM_CALL64_MSC
762 push rbx
763
764 mov r11, rdx ; pu64EaxEdx (is also T1)
765 mov r10, rcx ; pu64Dst
766
767 mov ebx, [r8]
768 mov ecx, [r8 + 4]
769 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770 mov eax, [r11]
771 mov edx, [r11 + 4]
772
773 lock cmpxchg8b [r10]
774
775 mov [r11], eax
776 mov [r11 + 4], edx
777 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779 pop rbx
780 ret
781 %else
782 push rbx
783
784 mov r10, rcx ; pEFlags
785 mov r11, rdx ; pu64EbxEcx (is also T1)
786
787 mov ebx, [r11]
788 mov ecx, [r11 + 4]
789 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790 mov eax, [rsi]
791 mov edx, [rsi + 4]
792
793 lock cmpxchg8b [rdi]
794
795 mov [rsi], eax
796 mov [rsi + 4], edx
797 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799 pop rbx
800 ret
801
802 %endif
803%else
804 push esi
805 push edi
806 push ebx
807 push ebp
808
809 mov edi, ecx ; pu64Dst
810 mov esi, edx ; pu64EaxEdx
811 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814 mov ebx, [ecx]
815 mov ecx, [ecx + 4]
816 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817 mov eax, [esi]
818 mov edx, [esi + 4]
819
820 lock cmpxchg8b [edi]
821
822 mov [esi], eax
823 mov [esi + 4], edx
824 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826 pop ebp
827 pop ebx
828 pop edi
829 pop esi
830 ret 8
831%endif
832ENDPROC iemAImpl_cmpxchg8b
833
834BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835 ; Lazy bird always lock prefixes cmpxchg8b.
836 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841;
842; CMPXCHG.
843;
844; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845;
846; C-proto:
847; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
848;
849BEGINCODE
850%macro IEMIMPL_CMPXCHG 2
851BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852 PROLOGUE_4_ARGS
853 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
854 mov al, [A1]
855 %1 cmpxchg [A0], A2_8
856 mov [A1], al
857 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858 EPILOGUE_4_ARGS
859ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862 PROLOGUE_4_ARGS
863 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
864 mov ax, [A1]
865 %1 cmpxchg [A0], A2_16
866 mov [A1], ax
867 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868 EPILOGUE_4_ARGS
869ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872 PROLOGUE_4_ARGS
873 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
874 mov eax, [A1]
875 %1 cmpxchg [A0], A2_32
876 mov [A1], eax
877 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878 EPILOGUE_4_ARGS
879ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882%ifdef RT_ARCH_AMD64
883 PROLOGUE_4_ARGS
884 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
885 mov ax, [A1]
886 %1 cmpxchg [A0], A2
887 mov [A1], ax
888 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889 EPILOGUE_4_ARGS
890%else
891 ;
892 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893 ;
894 push esi
895 push edi
896 push ebx
897 push ebp
898
899 mov edi, ecx ; pu64Dst
900 mov esi, edx ; pu64Rax
901 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904 mov ebx, [ecx]
905 mov ecx, [ecx + 4]
906 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
907 mov eax, [esi]
908 mov edx, [esi + 4]
909
910 lock cmpxchg8b [edi]
911
912 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913 jz .cmpxchg8b_not_equal
914 cmp eax, eax ; just set the other flags.
915.store:
916 mov [esi], eax
917 mov [esi + 4], edx
918 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920 pop ebp
921 pop ebx
922 pop edi
923 pop esi
924 ret 8
925
926.cmpxchg8b_not_equal:
927 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928 jne .store
929 cmp [esi], eax
930 jmp .store
931
932%endif
933ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934%endmacro ; IEMIMPL_CMPXCHG
935
936IEMIMPL_CMPXCHG , ,
937IEMIMPL_CMPXCHG lock, _locked
938
939;;
940; Macro for implementing a unary operator.
941;
942; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943; variants, except on 32-bit system where the 64-bit accesses requires hand
944; coding.
945;
946; All the functions takes a pointer to the destination memory operand in A0,
947; the source register operand in A1 and a pointer to eflags in A2.
948;
949; @param 1 The instruction mnemonic.
950; @param 2 The modified flags.
951; @param 3 The undefined flags.
952;
953%macro IEMIMPL_UNARY_OP 3
954BEGINCODE
955BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956 PROLOGUE_2_ARGS
957 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958 %1 byte [A0]
959 IEM_SAVE_FLAGS A1, %2, %3
960 EPILOGUE_2_ARGS
961ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964 PROLOGUE_2_ARGS
965 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966 lock %1 byte [A0]
967 IEM_SAVE_FLAGS A1, %2, %3
968 EPILOGUE_2_ARGS
969ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972 PROLOGUE_2_ARGS
973 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974 %1 word [A0]
975 IEM_SAVE_FLAGS A1, %2, %3
976 EPILOGUE_2_ARGS
977ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980 PROLOGUE_2_ARGS
981 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982 lock %1 word [A0]
983 IEM_SAVE_FLAGS A1, %2, %3
984 EPILOGUE_2_ARGS
985ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988 PROLOGUE_2_ARGS
989 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990 %1 dword [A0]
991 IEM_SAVE_FLAGS A1, %2, %3
992 EPILOGUE_2_ARGS
993ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996 PROLOGUE_2_ARGS
997 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998 lock %1 dword [A0]
999 IEM_SAVE_FLAGS A1, %2, %3
1000 EPILOGUE_2_ARGS
1001ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003 %ifdef RT_ARCH_AMD64
1004BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005 PROLOGUE_2_ARGS
1006 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007 %1 qword [A0]
1008 IEM_SAVE_FLAGS A1, %2, %3
1009 EPILOGUE_2_ARGS
1010ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013 PROLOGUE_2_ARGS
1014 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015 lock %1 qword [A0]
1016 IEM_SAVE_FLAGS A1, %2, %3
1017 EPILOGUE_2_ARGS
1018ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019 %else
1020 ; stub them for now.
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022 int3
1023 ret 0
1024ENDPROC iemAImpl_ %+ %1 %+ _u64
1025BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026 int3
1027 ret 0
1028ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029 %endif
1030
1031%endmacro
1032
1033IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1034IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1035IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1036IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039
1040;;
1041; Macro for implementing a shift operation.
1042;
1043; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1044; 32-bit system where the 64-bit accesses requires hand coding.
1045;
1046; All the functions takes a pointer to the destination memory operand in A0,
1047; the shift count in A1 and a pointer to eflags in A2.
1048;
1049; @param 1 The instruction mnemonic.
1050; @param 2 The modified flags.
1051; @param 3 The undefined flags.
1052;
1053; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1054;
1055%macro IEMIMPL_SHIFT_OP 3
1056BEGINCODE
1057BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1058 PROLOGUE_3_ARGS
1059 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1060 %ifdef ASM_CALL64_GCC
1061 mov cl, A1_8
1062 %1 byte [A0], cl
1063 %else
1064 xchg A1, A0
1065 %1 byte [A1], cl
1066 %endif
1067 IEM_SAVE_FLAGS A2, %2, %3
1068 EPILOGUE_3_ARGS
1069ENDPROC iemAImpl_ %+ %1 %+ _u8
1070
1071BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1072 PROLOGUE_3_ARGS
1073 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1074 %ifdef ASM_CALL64_GCC
1075 mov cl, A1_8
1076 %1 word [A0], cl
1077 %else
1078 xchg A1, A0
1079 %1 word [A1], cl
1080 %endif
1081 IEM_SAVE_FLAGS A2, %2, %3
1082 EPILOGUE_3_ARGS
1083ENDPROC iemAImpl_ %+ %1 %+ _u16
1084
1085BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1086 PROLOGUE_3_ARGS
1087 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1088 %ifdef ASM_CALL64_GCC
1089 mov cl, A1_8
1090 %1 dword [A0], cl
1091 %else
1092 xchg A1, A0
1093 %1 dword [A1], cl
1094 %endif
1095 IEM_SAVE_FLAGS A2, %2, %3
1096 EPILOGUE_3_ARGS
1097ENDPROC iemAImpl_ %+ %1 %+ _u32
1098
1099 %ifdef RT_ARCH_AMD64
1100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1101 PROLOGUE_3_ARGS
1102 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1103 %ifdef ASM_CALL64_GCC
1104 mov cl, A1_8
1105 %1 qword [A0], cl
1106 %else
1107 xchg A1, A0
1108 %1 qword [A1], cl
1109 %endif
1110 IEM_SAVE_FLAGS A2, %2, %3
1111 EPILOGUE_3_ARGS
1112ENDPROC iemAImpl_ %+ %1 %+ _u64
1113 %else ; stub it for now - later, replace with hand coded stuff.
1114BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1115 int3
1116 ret 4
1117ENDPROC iemAImpl_ %+ %1 %+ _u64
1118 %endif ; !RT_ARCH_AMD64
1119
1120%endmacro
1121
1122IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1123IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1124IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1125IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1126IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1127IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1128IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1129
1130
1131;;
1132; Macro for implementing a double precision shift operation.
1133;
1134; This will generate code for the 16, 32 and 64 bit accesses, except on
1135; 32-bit system where the 64-bit accesses requires hand coding.
1136;
1137; The functions takes the destination operand (r/m) in A0, the source (reg) in
1138; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1139;
1140; @param 1 The instruction mnemonic.
1141; @param 2 The modified flags.
1142; @param 3 The undefined flags.
1143;
1144; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1145;
1146%macro IEMIMPL_SHIFT_DBL_OP 3
1147BEGINCODE
1148BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1149 PROLOGUE_4_ARGS
1150 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1151 %ifdef ASM_CALL64_GCC
1152 xchg A3, A2
1153 %1 [A0], A1_16, cl
1154 xchg A3, A2
1155 %else
1156 xchg A0, A2
1157 %1 [A2], A1_16, cl
1158 %endif
1159 IEM_SAVE_FLAGS A3, %2, %3
1160 EPILOGUE_4_ARGS
1161ENDPROC iemAImpl_ %+ %1 %+ _u16
1162
1163BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1164 PROLOGUE_4_ARGS
1165 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1166 %ifdef ASM_CALL64_GCC
1167 xchg A3, A2
1168 %1 [A0], A1_32, cl
1169 xchg A3, A2
1170 %else
1171 xchg A0, A2
1172 %1 [A2], A1_32, cl
1173 %endif
1174 IEM_SAVE_FLAGS A3, %2, %3
1175 EPILOGUE_4_ARGS
1176ENDPROC iemAImpl_ %+ %1 %+ _u32
1177
1178 %ifdef RT_ARCH_AMD64
1179BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1180 PROLOGUE_4_ARGS
1181 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1182 %ifdef ASM_CALL64_GCC
1183 xchg A3, A2
1184 %1 [A0], A1, cl
1185 xchg A3, A2
1186 %else
1187 xchg A0, A2
1188 %1 [A2], A1, cl
1189 %endif
1190 IEM_SAVE_FLAGS A3, %2, %3
1191 EPILOGUE_4_ARGS_EX 12
1192ENDPROC iemAImpl_ %+ %1 %+ _u64
1193 %else ; stub it for now - later, replace with hand coded stuff.
1194BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1195 int3
1196 ret 12
1197ENDPROC iemAImpl_ %+ %1 %+ _u64
1198 %endif ; !RT_ARCH_AMD64
1199
1200%endmacro
1201
1202IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1203IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1204
1205
1206;;
1207; Macro for implementing a multiplication operations.
1208;
1209; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1210; 32-bit system where the 64-bit accesses requires hand coding.
1211;
1212; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1213; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1214; pointer to eflags in A3.
1215;
1216; The functions all return 0 so the caller can be used for div/idiv as well as
1217; for the mul/imul implementation.
1218;
1219; @param 1 The instruction mnemonic.
1220; @param 2 The modified flags.
1221; @param 3 The undefined flags.
1222;
1223; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1224;
1225%macro IEMIMPL_MUL_OP 3
1226BEGINCODE
1227BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1228 PROLOGUE_3_ARGS
1229 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1230 mov al, [A0]
1231 %1 A1_8
1232 mov [A0], ax
1233 IEM_SAVE_FLAGS A2, %2, %3
1234 xor eax, eax
1235 EPILOGUE_3_ARGS
1236ENDPROC iemAImpl_ %+ %1 %+ _u8
1237
1238BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239 PROLOGUE_4_ARGS
1240 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241 mov ax, [A0]
1242 %ifdef ASM_CALL64_GCC
1243 %1 A2_16
1244 mov [A0], ax
1245 mov [A1], dx
1246 %else
1247 mov T1, A1
1248 %1 A2_16
1249 mov [A0], ax
1250 mov [T1], dx
1251 %endif
1252 IEM_SAVE_FLAGS A3, %2, %3
1253 xor eax, eax
1254 EPILOGUE_4_ARGS
1255ENDPROC iemAImpl_ %+ %1 %+ _u16
1256
1257BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1258 PROLOGUE_4_ARGS
1259 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1260 mov eax, [A0]
1261 %ifdef ASM_CALL64_GCC
1262 %1 A2_32
1263 mov [A0], eax
1264 mov [A1], edx
1265 %else
1266 mov T1, A1
1267 %1 A2_32
1268 mov [A0], eax
1269 mov [T1], edx
1270 %endif
1271 IEM_SAVE_FLAGS A3, %2, %3
1272 xor eax, eax
1273 EPILOGUE_4_ARGS
1274ENDPROC iemAImpl_ %+ %1 %+ _u32
1275
1276 %ifdef RT_ARCH_AMD64
1277BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1278 PROLOGUE_4_ARGS
1279 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1280 mov rax, [A0]
1281 %ifdef ASM_CALL64_GCC
1282 %1 A2
1283 mov [A0], rax
1284 mov [A1], rdx
1285 %else
1286 mov T1, A1
1287 %1 A2
1288 mov [A0], rax
1289 mov [T1], rdx
1290 %endif
1291 IEM_SAVE_FLAGS A3, %2, %3
1292 xor eax, eax
1293 EPILOGUE_4_ARGS_EX 12
1294ENDPROC iemAImpl_ %+ %1 %+ _u64
1295 %else ; stub it for now - later, replace with hand coded stuff.
1296BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1297 int3
1298 ret 12
1299ENDPROC iemAImpl_ %+ %1 %+ _u64
1300 %endif ; !RT_ARCH_AMD64
1301
1302%endmacro
1303
1304IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1305IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1306
1307
1308;;
1309; Macro for implementing a division operations.
1310;
1311; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1312; 32-bit system where the 64-bit accesses requires hand coding.
1313;
1314; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1315; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1316; pointer to eflags in A3.
1317;
1318; The functions all return 0 on success and -1 if a divide error should be
1319; raised by the caller.
1320;
1321; @param 1 The instruction mnemonic.
1322; @param 2 The modified flags.
1323; @param 3 The undefined flags.
1324; @param 4 1 if signed, 0 if unsigned.
1325;
1326; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1327;
1328%macro IEMIMPL_DIV_OP 4
1329BEGINCODE
1330BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1331 PROLOGUE_3_ARGS
1332
1333 ; div by chainsaw check.
1334 test A1_8, A1_8
1335 jz .div_zero
1336
1337 ; Overflow check - unsigned division is simple to verify, haven't
1338 ; found a simple way to check signed division yet unfortunately.
1339 %if %4 == 0
1340 cmp [A0 + 1], A1_8
1341 jae .div_overflow
1342 %else
1343 mov T0_16, [A0] ; T0 = dividend
1344 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1345 test A1_8, A1_8
1346 js .divisor_negative
1347 test T0_16, T0_16
1348 jns .both_positive
1349 neg T0_16
1350.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1351 push T0 ; Start off like unsigned below.
1352 shr T0_16, 7
1353 cmp T0_8, A1_8
1354 pop T0
1355 jb .div_no_overflow
1356 ja .div_overflow
1357 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1358 cmp T0_8, A1_8
1359 jae .div_overflow
1360 jmp .div_no_overflow
1361
1362.divisor_negative:
1363 neg A1_8
1364 test T0_16, T0_16
1365 jns .one_of_each
1366 neg T0_16
1367.both_positive: ; Same as unsigned shifted by sign indicator bit.
1368 shr T0_16, 7
1369 cmp T0_8, A1_8
1370 jae .div_overflow
1371.div_no_overflow:
1372 mov A1, T1 ; restore divisor
1373 %endif
1374
1375 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1376 mov ax, [A0]
1377 %1 A1_8
1378 mov [A0], ax
1379 IEM_SAVE_FLAGS A2, %2, %3
1380 xor eax, eax
1381
1382.return:
1383 EPILOGUE_3_ARGS
1384
1385.div_zero:
1386.div_overflow:
1387 mov eax, -1
1388 jmp .return
1389ENDPROC iemAImpl_ %+ %1 %+ _u8
1390
1391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1392 PROLOGUE_4_ARGS
1393
1394 test A2_16, A2_16
1395 jz .div_zero
1396 %if %4 == 0
1397 cmp [A1], A2_16
1398 jae .div_overflow
1399 %else
1400 ;; @todo idiv overflow checking.
1401 %endif
1402
1403 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1404 %ifdef ASM_CALL64_GCC
1405 mov T1, A2
1406 mov ax, [A0]
1407 mov dx, [A1]
1408 %1 T1_16
1409 mov [A0], ax
1410 mov [A1], dx
1411 %else
1412 mov T1, A1
1413 mov ax, [A0]
1414 mov dx, [T1]
1415 %1 A2_16
1416 mov [A0], ax
1417 mov [T1], dx
1418 %endif
1419 IEM_SAVE_FLAGS A3, %2, %3
1420 xor eax, eax
1421
1422.return:
1423 EPILOGUE_4_ARGS
1424
1425.div_zero:
1426.div_overflow:
1427 mov eax, -1
1428 jmp .return
1429ENDPROC iemAImpl_ %+ %1 %+ _u16
1430
1431BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1432 PROLOGUE_4_ARGS
1433
1434 test A2_32, A2_32
1435 jz .div_zero
1436 %if %4 == 0
1437 cmp [A1], A2_32
1438 jae .div_overflow
1439 %else
1440 ;; @todo idiv overflow checking.
1441 %endif
1442
1443 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1444 mov eax, [A0]
1445 %ifdef ASM_CALL64_GCC
1446 mov T1, A2
1447 mov eax, [A0]
1448 mov edx, [A1]
1449 %1 T1_32
1450 mov [A0], eax
1451 mov [A1], edx
1452 %else
1453 mov T1, A1
1454 mov eax, [A0]
1455 mov edx, [T1]
1456 %1 A2_32
1457 mov [A0], eax
1458 mov [T1], edx
1459 %endif
1460 IEM_SAVE_FLAGS A3, %2, %3
1461 xor eax, eax
1462
1463.return:
1464 EPILOGUE_4_ARGS
1465
1466.div_zero:
1467.div_overflow:
1468 mov eax, -1
1469 jmp .return
1470ENDPROC iemAImpl_ %+ %1 %+ _u32
1471
1472 %ifdef RT_ARCH_AMD64
1473BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1474 PROLOGUE_4_ARGS
1475
1476 test A2, A2
1477 jz .div_zero
1478 %if %4 == 0
1479 cmp [A1], A2
1480 jae .div_overflow
1481 %else
1482 ;; @todo idiv overflow checking.
1483 %endif
1484
1485 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1486 mov rax, [A0]
1487 %ifdef ASM_CALL64_GCC
1488 mov T1, A2
1489 mov rax, [A0]
1490 mov rdx, [A1]
1491 %1 T1
1492 mov [A0], rax
1493 mov [A1], rdx
1494 %else
1495 mov T1, A1
1496 mov rax, [A0]
1497 mov rdx, [T1]
1498 %1 A2
1499 mov [A0], rax
1500 mov [T1], rdx
1501 %endif
1502 IEM_SAVE_FLAGS A3, %2, %3
1503 xor eax, eax
1504
1505.return:
1506 EPILOGUE_4_ARGS_EX 12
1507
1508.div_zero:
1509.div_overflow:
1510 mov eax, -1
1511 jmp .return
1512ENDPROC iemAImpl_ %+ %1 %+ _u64
1513 %else ; stub it for now - later, replace with hand coded stuff.
1514BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1515 int3
1516 ret
1517ENDPROC iemAImpl_ %+ %1 %+ _u64
1518 %endif ; !RT_ARCH_AMD64
1519
1520%endmacro
1521
1522IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1523IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1524
1525
1526;
1527; BSWAP. No flag changes.
1528;
1529; Each function takes one argument, pointer to the value to bswap
1530; (input/output). They all return void.
1531;
1532BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1533 PROLOGUE_1_ARGS
1534 mov T0_32, [A0] ; just in case any of the upper bits are used.
1535 db 66h
1536 bswap T0_32
1537 mov [A0], T0_32
1538 EPILOGUE_1_ARGS
1539ENDPROC iemAImpl_bswap_u16
1540
1541BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1542 PROLOGUE_1_ARGS
1543 mov T0_32, [A0]
1544 bswap T0_32
1545 mov [A0], T0_32
1546 EPILOGUE_1_ARGS
1547ENDPROC iemAImpl_bswap_u32
1548
1549BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1550%ifdef RT_ARCH_AMD64
1551 PROLOGUE_1_ARGS
1552 mov T0, [A0]
1553 bswap T0
1554 mov [A0], T0
1555 EPILOGUE_1_ARGS
1556%else
1557 PROLOGUE_1_ARGS
1558 mov T0, [A0]
1559 mov T1, [A0 + 4]
1560 bswap T0
1561 bswap T1
1562 mov [A0 + 4], T0
1563 mov [A0], T1
1564 EPILOGUE_1_ARGS
1565%endif
1566ENDPROC iemAImpl_bswap_u64
1567
1568
1569;;
1570; Initialize the FPU for the actual instruction being emulated, this means
1571; loading parts of the guest's control word and status word.
1572;
1573; @uses 24 bytes of stack.
1574; @param 1 Expression giving the address of the FXSTATE of the guest.
1575;
1576%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1577 fnstenv [xSP]
1578
1579 ; FCW - for exception, precision and rounding control.
1580 movzx T0, word [%1 + X86FXSTATE.FCW]
1581 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1582 mov [xSP + X86FSTENV32P.FCW], T0_16
1583
1584 ; FSW - for undefined C0, C1, C2, and C3.
1585 movzx T1, word [%1 + X86FXSTATE.FSW]
1586 and T1, X86_FSW_C_MASK
1587 movzx T0, word [xSP + X86FSTENV32P.FSW]
1588 and T0, X86_FSW_TOP_MASK
1589 or T0, T1
1590 mov [xSP + X86FSTENV32P.FSW], T0_16
1591
1592 fldenv [xSP]
1593%endmacro
1594
1595
1596;;
1597; Need to move this as well somewhere better?
1598;
1599struc IEMFPURESULT
1600 .r80Result resw 5
1601 .FSW resw 1
1602endstruc
1603
1604
1605;;
1606; Need to move this as well somewhere better?
1607;
1608struc IEMFPURESULTTWO
1609 .r80Result1 resw 5
1610 .FSW resw 1
1611 .r80Result2 resw 5
1612endstruc
1613
1614
1615;
1616;---------------------- 16-bit signed integer operations ----------------------
1617;
1618
1619
1620;;
1621; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1622;
1623; @param A0 FPU context (fxsave).
1624; @param A1 Pointer to a IEMFPURESULT for the output.
1625; @param A2 Pointer to the 16-bit floating point value to convert.
1626;
1627BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1628 PROLOGUE_3_ARGS
1629 sub xSP, 20h
1630
1631 fninit
1632 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1633 fild word [A2]
1634
1635 fnstsw word [A1 + IEMFPURESULT.FSW]
1636 fnclex
1637 fstp tword [A1 + IEMFPURESULT.r80Result]
1638
1639 fninit
1640 add xSP, 20h
1641 EPILOGUE_3_ARGS
1642ENDPROC iemAImpl_fild_i16_to_r80
1643
1644
1645;;
1646; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1647;
1648; @param A0 FPU context (fxsave).
1649; @param A1 Where to return the output FSW.
1650; @param A2 Where to store the 16-bit signed integer value.
1651; @param A3 Pointer to the 80-bit value.
1652;
1653BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1654 PROLOGUE_4_ARGS
1655 sub xSP, 20h
1656
1657 fninit
1658 fld tword [A3]
1659 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1660 fistp word [A2]
1661
1662 fnstsw word [A1]
1663
1664 fninit
1665 add xSP, 20h
1666 EPILOGUE_4_ARGS
1667ENDPROC iemAImpl_fist_r80_to_i16
1668
1669
1670;;
1671; Store a 80-bit floating point value (register) as a 16-bit signed integer
1672; (memory) with truncation.
1673;
1674; @param A0 FPU context (fxsave).
1675; @param A1 Where to return the output FSW.
1676; @param A2 Where to store the 16-bit signed integer value.
1677; @param A3 Pointer to the 80-bit value.
1678;
1679BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1680 PROLOGUE_4_ARGS
1681 sub xSP, 20h
1682
1683 fninit
1684 fld tword [A3]
1685 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1686 fisttp dword [A2]
1687
1688 fnstsw word [A1]
1689
1690 fninit
1691 add xSP, 20h
1692 EPILOGUE_4_ARGS
1693ENDPROC iemAImpl_fistt_r80_to_i16
1694
1695
1696;;
1697; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1698;
1699; @param 1 The instruction
1700;
1701; @param A0 FPU context (fxsave).
1702; @param A1 Pointer to a IEMFPURESULT for the output.
1703; @param A2 Pointer to the 80-bit value.
1704; @param A3 Pointer to the 16-bit value.
1705;
1706%macro IEMIMPL_FPU_R80_BY_I16 1
1707BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1708 PROLOGUE_4_ARGS
1709 sub xSP, 20h
1710
1711 fninit
1712 fld tword [A2]
1713 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1714 %1 word [A3]
1715
1716 fnstsw word [A1 + IEMFPURESULT.FSW]
1717 fnclex
1718 fstp tword [A1 + IEMFPURESULT.r80Result]
1719
1720 fninit
1721 add xSP, 20h
1722 EPILOGUE_4_ARGS
1723ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1724%endmacro
1725
1726IEMIMPL_FPU_R80_BY_I16 fiadd
1727IEMIMPL_FPU_R80_BY_I16 fimul
1728IEMIMPL_FPU_R80_BY_I16 fisub
1729IEMIMPL_FPU_R80_BY_I16 fisubr
1730IEMIMPL_FPU_R80_BY_I16 fidiv
1731IEMIMPL_FPU_R80_BY_I16 fidivr
1732
1733
1734;;
1735; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1736; only returning FSW.
1737;
1738; @param 1 The instruction
1739;
1740; @param A0 FPU context (fxsave).
1741; @param A1 Where to store the output FSW.
1742; @param A2 Pointer to the 80-bit value.
1743; @param A3 Pointer to the 64-bit value.
1744;
1745%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1746BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1747 PROLOGUE_4_ARGS
1748 sub xSP, 20h
1749
1750 fninit
1751 fld tword [A2]
1752 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1753 %1 word [A3]
1754
1755 fnstsw word [A1]
1756
1757 fninit
1758 add xSP, 20h
1759 EPILOGUE_4_ARGS
1760ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1761%endmacro
1762
1763IEMIMPL_FPU_R80_BY_I16_FSW ficom
1764
1765
1766
1767;
1768;---------------------- 32-bit signed integer operations ----------------------
1769;
1770
1771
1772;;
1773; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1774;
1775; @param A0 FPU context (fxsave).
1776; @param A1 Pointer to a IEMFPURESULT for the output.
1777; @param A2 Pointer to the 32-bit floating point value to convert.
1778;
1779BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1780 PROLOGUE_3_ARGS
1781 sub xSP, 20h
1782
1783 fninit
1784 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1785 fild dword [A2]
1786
1787 fnstsw word [A1 + IEMFPURESULT.FSW]
1788 fnclex
1789 fstp tword [A1 + IEMFPURESULT.r80Result]
1790
1791 fninit
1792 add xSP, 20h
1793 EPILOGUE_3_ARGS
1794ENDPROC iemAImpl_fild_i32_to_r80
1795
1796
1797;;
1798; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1799;
1800; @param A0 FPU context (fxsave).
1801; @param A1 Where to return the output FSW.
1802; @param A2 Where to store the 32-bit signed integer value.
1803; @param A3 Pointer to the 80-bit value.
1804;
1805BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1806 PROLOGUE_4_ARGS
1807 sub xSP, 20h
1808
1809 fninit
1810 fld tword [A3]
1811 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1812 fistp dword [A2]
1813
1814 fnstsw word [A1]
1815
1816 fninit
1817 add xSP, 20h
1818 EPILOGUE_4_ARGS
1819ENDPROC iemAImpl_fist_r80_to_i32
1820
1821
1822;;
1823; Store a 80-bit floating point value (register) as a 32-bit signed integer
1824; (memory) with truncation.
1825;
1826; @param A0 FPU context (fxsave).
1827; @param A1 Where to return the output FSW.
1828; @param A2 Where to store the 32-bit signed integer value.
1829; @param A3 Pointer to the 80-bit value.
1830;
1831BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1832 PROLOGUE_4_ARGS
1833 sub xSP, 20h
1834
1835 fninit
1836 fld tword [A3]
1837 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1838 fisttp dword [A2]
1839
1840 fnstsw word [A1]
1841
1842 fninit
1843 add xSP, 20h
1844 EPILOGUE_4_ARGS
1845ENDPROC iemAImpl_fistt_r80_to_i32
1846
1847
1848;;
1849; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1850;
1851; @param 1 The instruction
1852;
1853; @param A0 FPU context (fxsave).
1854; @param A1 Pointer to a IEMFPURESULT for the output.
1855; @param A2 Pointer to the 80-bit value.
1856; @param A3 Pointer to the 32-bit value.
1857;
1858%macro IEMIMPL_FPU_R80_BY_I32 1
1859BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1860 PROLOGUE_4_ARGS
1861 sub xSP, 20h
1862
1863 fninit
1864 fld tword [A2]
1865 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1866 %1 dword [A3]
1867
1868 fnstsw word [A1 + IEMFPURESULT.FSW]
1869 fnclex
1870 fstp tword [A1 + IEMFPURESULT.r80Result]
1871
1872 fninit
1873 add xSP, 20h
1874 EPILOGUE_4_ARGS
1875ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1876%endmacro
1877
1878IEMIMPL_FPU_R80_BY_I32 fiadd
1879IEMIMPL_FPU_R80_BY_I32 fimul
1880IEMIMPL_FPU_R80_BY_I32 fisub
1881IEMIMPL_FPU_R80_BY_I32 fisubr
1882IEMIMPL_FPU_R80_BY_I32 fidiv
1883IEMIMPL_FPU_R80_BY_I32 fidivr
1884
1885
1886;;
1887; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1888; only returning FSW.
1889;
1890; @param 1 The instruction
1891;
1892; @param A0 FPU context (fxsave).
1893; @param A1 Where to store the output FSW.
1894; @param A2 Pointer to the 80-bit value.
1895; @param A3 Pointer to the 64-bit value.
1896;
1897%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1898BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1899 PROLOGUE_4_ARGS
1900 sub xSP, 20h
1901
1902 fninit
1903 fld tword [A2]
1904 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1905 %1 dword [A3]
1906
1907 fnstsw word [A1]
1908
1909 fninit
1910 add xSP, 20h
1911 EPILOGUE_4_ARGS
1912ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1913%endmacro
1914
1915IEMIMPL_FPU_R80_BY_I32_FSW ficom
1916
1917
1918
1919;
1920;---------------------- 64-bit signed integer operations ----------------------
1921;
1922
1923
1924;;
1925; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1926;
1927; @param A0 FPU context (fxsave).
1928; @param A1 Pointer to a IEMFPURESULT for the output.
1929; @param A2 Pointer to the 64-bit floating point value to convert.
1930;
1931BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1932 PROLOGUE_3_ARGS
1933 sub xSP, 20h
1934
1935 fninit
1936 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1937 fild qword [A2]
1938
1939 fnstsw word [A1 + IEMFPURESULT.FSW]
1940 fnclex
1941 fstp tword [A1 + IEMFPURESULT.r80Result]
1942
1943 fninit
1944 add xSP, 20h
1945 EPILOGUE_3_ARGS
1946ENDPROC iemAImpl_fild_i64_to_r80
1947
1948
1949;;
1950; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1951;
1952; @param A0 FPU context (fxsave).
1953; @param A1 Where to return the output FSW.
1954; @param A2 Where to store the 64-bit signed integer value.
1955; @param A3 Pointer to the 80-bit value.
1956;
1957BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
1958 PROLOGUE_4_ARGS
1959 sub xSP, 20h
1960
1961 fninit
1962 fld tword [A3]
1963 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1964 fistp qword [A2]
1965
1966 fnstsw word [A1]
1967
1968 fninit
1969 add xSP, 20h
1970 EPILOGUE_4_ARGS
1971ENDPROC iemAImpl_fist_r80_to_i64
1972
1973
1974;;
1975; Store a 80-bit floating point value (register) as a 64-bit signed integer
1976; (memory) with truncation.
1977;
1978; @param A0 FPU context (fxsave).
1979; @param A1 Where to return the output FSW.
1980; @param A2 Where to store the 64-bit signed integer value.
1981; @param A3 Pointer to the 80-bit value.
1982;
1983BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
1984 PROLOGUE_4_ARGS
1985 sub xSP, 20h
1986
1987 fninit
1988 fld tword [A3]
1989 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1990 fisttp qword [A2]
1991
1992 fnstsw word [A1]
1993
1994 fninit
1995 add xSP, 20h
1996 EPILOGUE_4_ARGS
1997ENDPROC iemAImpl_fistt_r80_to_i64
1998
1999
2000
2001;
2002;---------------------- 32-bit floating point operations ----------------------
2003;
2004
2005;;
2006; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2007;
2008; @param A0 FPU context (fxsave).
2009; @param A1 Pointer to a IEMFPURESULT for the output.
2010; @param A2 Pointer to the 32-bit floating point value to convert.
2011;
2012BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2013 PROLOGUE_3_ARGS
2014 sub xSP, 20h
2015
2016 fninit
2017 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2018 fld dword [A2]
2019
2020 fnstsw word [A1 + IEMFPURESULT.FSW]
2021 fnclex
2022 fstp tword [A1 + IEMFPURESULT.r80Result]
2023
2024 fninit
2025 add xSP, 20h
2026 EPILOGUE_3_ARGS
2027ENDPROC iemAImpl_fld_r32_to_r80
2028
2029
2030;;
2031; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2032;
2033; @param A0 FPU context (fxsave).
2034; @param A1 Where to return the output FSW.
2035; @param A2 Where to store the 32-bit value.
2036; @param A3 Pointer to the 80-bit value.
2037;
2038BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2039 PROLOGUE_4_ARGS
2040 sub xSP, 20h
2041
2042 fninit
2043 fld tword [A3]
2044 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2045 fst dword [A2]
2046
2047 fnstsw word [A1]
2048
2049 fninit
2050 add xSP, 20h
2051 EPILOGUE_4_ARGS
2052ENDPROC iemAImpl_fst_r80_to_r32
2053
2054
2055;;
2056; FPU instruction working on one 80-bit and one 32-bit floating point value.
2057;
2058; @param 1 The instruction
2059;
2060; @param A0 FPU context (fxsave).
2061; @param A1 Pointer to a IEMFPURESULT for the output.
2062; @param A2 Pointer to the 80-bit value.
2063; @param A3 Pointer to the 32-bit value.
2064;
2065%macro IEMIMPL_FPU_R80_BY_R32 1
2066BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2067 PROLOGUE_4_ARGS
2068 sub xSP, 20h
2069
2070 fninit
2071 fld tword [A2]
2072 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2073 %1 dword [A3]
2074
2075 fnstsw word [A1 + IEMFPURESULT.FSW]
2076 fnclex
2077 fstp tword [A1 + IEMFPURESULT.r80Result]
2078
2079 fninit
2080 add xSP, 20h
2081 EPILOGUE_4_ARGS
2082ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2083%endmacro
2084
2085IEMIMPL_FPU_R80_BY_R32 fadd
2086IEMIMPL_FPU_R80_BY_R32 fmul
2087IEMIMPL_FPU_R80_BY_R32 fsub
2088IEMIMPL_FPU_R80_BY_R32 fsubr
2089IEMIMPL_FPU_R80_BY_R32 fdiv
2090IEMIMPL_FPU_R80_BY_R32 fdivr
2091
2092
2093;;
2094; FPU instruction working on one 80-bit and one 32-bit floating point value,
2095; only returning FSW.
2096;
2097; @param 1 The instruction
2098;
2099; @param A0 FPU context (fxsave).
2100; @param A1 Where to store the output FSW.
2101; @param A2 Pointer to the 80-bit value.
2102; @param A3 Pointer to the 64-bit value.
2103;
2104%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2105BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2106 PROLOGUE_4_ARGS
2107 sub xSP, 20h
2108
2109 fninit
2110 fld tword [A2]
2111 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2112 %1 dword [A3]
2113
2114 fnstsw word [A1]
2115
2116 fninit
2117 add xSP, 20h
2118 EPILOGUE_4_ARGS
2119ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2120%endmacro
2121
2122IEMIMPL_FPU_R80_BY_R32_FSW fcom
2123
2124
2125
2126;
2127;---------------------- 64-bit floating point operations ----------------------
2128;
2129
2130;;
2131; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2132;
2133; @param A0 FPU context (fxsave).
2134; @param A1 Pointer to a IEMFPURESULT for the output.
2135; @param A2 Pointer to the 64-bit floating point value to convert.
2136;
2137BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2138 PROLOGUE_3_ARGS
2139 sub xSP, 20h
2140
2141 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2142 fld qword [A2]
2143
2144 fnstsw word [A1 + IEMFPURESULT.FSW]
2145 fnclex
2146 fstp tword [A1 + IEMFPURESULT.r80Result]
2147
2148 fninit
2149 add xSP, 20h
2150 EPILOGUE_3_ARGS
2151ENDPROC iemAImpl_fld_r64_to_r80
2152
2153
2154;;
2155; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2156;
2157; @param A0 FPU context (fxsave).
2158; @param A1 Where to return the output FSW.
2159; @param A2 Where to store the 64-bit value.
2160; @param A3 Pointer to the 80-bit value.
2161;
2162BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2163 PROLOGUE_4_ARGS
2164 sub xSP, 20h
2165
2166 fninit
2167 fld tword [A3]
2168 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2169 fst qword [A2]
2170
2171 fnstsw word [A1]
2172
2173 fninit
2174 add xSP, 20h
2175 EPILOGUE_4_ARGS
2176ENDPROC iemAImpl_fst_r80_to_r64
2177
2178
2179;;
2180; FPU instruction working on one 80-bit and one 64-bit floating point value.
2181;
2182; @param 1 The instruction
2183;
2184; @param A0 FPU context (fxsave).
2185; @param A1 Pointer to a IEMFPURESULT for the output.
2186; @param A2 Pointer to the 80-bit value.
2187; @param A3 Pointer to the 64-bit value.
2188;
2189%macro IEMIMPL_FPU_R80_BY_R64 1
2190BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2191 PROLOGUE_4_ARGS
2192 sub xSP, 20h
2193
2194 fninit
2195 fld tword [A2]
2196 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2197 %1 qword [A3]
2198
2199 fnstsw word [A1 + IEMFPURESULT.FSW]
2200 fnclex
2201 fstp tword [A1 + IEMFPURESULT.r80Result]
2202
2203 fninit
2204 add xSP, 20h
2205 EPILOGUE_4_ARGS
2206ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2207%endmacro
2208
2209IEMIMPL_FPU_R80_BY_R64 fadd
2210IEMIMPL_FPU_R80_BY_R64 fmul
2211IEMIMPL_FPU_R80_BY_R64 fsub
2212IEMIMPL_FPU_R80_BY_R64 fsubr
2213IEMIMPL_FPU_R80_BY_R64 fdiv
2214IEMIMPL_FPU_R80_BY_R64 fdivr
2215
2216;;
2217; FPU instruction working on one 80-bit and one 64-bit floating point value,
2218; only returning FSW.
2219;
2220; @param 1 The instruction
2221;
2222; @param A0 FPU context (fxsave).
2223; @param A1 Where to store the output FSW.
2224; @param A2 Pointer to the 80-bit value.
2225; @param A3 Pointer to the 64-bit value.
2226;
2227%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2228BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2229 PROLOGUE_4_ARGS
2230 sub xSP, 20h
2231
2232 fninit
2233 fld tword [A2]
2234 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2235 %1 qword [A3]
2236
2237 fnstsw word [A1]
2238
2239 fninit
2240 add xSP, 20h
2241 EPILOGUE_4_ARGS
2242ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2243%endmacro
2244
2245IEMIMPL_FPU_R80_BY_R64_FSW fcom
2246
2247
2248
2249;
2250;---------------------- 80-bit floating point operations ----------------------
2251;
2252
2253;;
2254; Loads a 80-bit floating point register value from memory.
2255;
2256; @param A0 FPU context (fxsave).
2257; @param A1 Pointer to a IEMFPURESULT for the output.
2258; @param A2 Pointer to the 80-bit floating point value to load.
2259;
2260BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2261 PROLOGUE_3_ARGS
2262 sub xSP, 20h
2263
2264 fninit
2265 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2266 fld tword [A2]
2267
2268 fnstsw word [A1 + IEMFPURESULT.FSW]
2269 fnclex
2270 fstp tword [A1 + IEMFPURESULT.r80Result]
2271
2272 fninit
2273 add xSP, 20h
2274 EPILOGUE_3_ARGS
2275ENDPROC iemAImpl_fld_r80_from_r80
2276
2277
2278;;
2279; Store a 80-bit floating point register to memory
2280;
2281; @param A0 FPU context (fxsave).
2282; @param A1 Where to return the output FSW.
2283; @param A2 Where to store the 80-bit value.
2284; @param A3 Pointer to the 80-bit register value.
2285;
2286BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2287 PROLOGUE_4_ARGS
2288 sub xSP, 20h
2289
2290 fninit
2291 fld tword [A3]
2292 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2293 fstp tword [A2]
2294
2295 fnstsw word [A1]
2296
2297 fninit
2298 add xSP, 20h
2299 EPILOGUE_4_ARGS
2300ENDPROC iemAImpl_fst_r80_to_r80
2301
2302
2303;;
2304; FPU instruction working on two 80-bit floating point values.
2305;
2306; @param 1 The instruction
2307;
2308; @param A0 FPU context (fxsave).
2309; @param A1 Pointer to a IEMFPURESULT for the output.
2310; @param A2 Pointer to the first 80-bit value (ST0)
2311; @param A3 Pointer to the second 80-bit value (STn).
2312;
2313%macro IEMIMPL_FPU_R80_BY_R80 2
2314BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2315 PROLOGUE_4_ARGS
2316 sub xSP, 20h
2317
2318 fninit
2319 fld tword [A3]
2320 fld tword [A2]
2321 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2322 %1 %2
2323
2324 fnstsw word [A1 + IEMFPURESULT.FSW]
2325 fnclex
2326 fstp tword [A1 + IEMFPURESULT.r80Result]
2327
2328 fninit
2329 add xSP, 20h
2330 EPILOGUE_4_ARGS
2331ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2332%endmacro
2333
2334IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2335IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2336IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2337IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2338IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2339IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2340IEMIMPL_FPU_R80_BY_R80 fprem, {}
2341IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2342IEMIMPL_FPU_R80_BY_R80 fscale, {}
2343
2344
2345;;
2346; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2347; storing the result in ST1 and popping the stack.
2348;
2349; @param 1 The instruction
2350;
2351; @param A0 FPU context (fxsave).
2352; @param A1 Pointer to a IEMFPURESULT for the output.
2353; @param A2 Pointer to the first 80-bit value (ST1).
2354; @param A3 Pointer to the second 80-bit value (ST0).
2355;
2356%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2357BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2358 PROLOGUE_4_ARGS
2359 sub xSP, 20h
2360
2361 fninit
2362 fld tword [A2]
2363 fld tword [A3]
2364 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2365 %1
2366
2367 fnstsw word [A1 + IEMFPURESULT.FSW]
2368 fnclex
2369 fstp tword [A1 + IEMFPURESULT.r80Result]
2370
2371 fninit
2372 add xSP, 20h
2373 EPILOGUE_4_ARGS
2374ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2375%endmacro
2376
2377IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2378IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2379
2380
2381;;
2382; FPU instruction working on two 80-bit floating point values, only
2383; returning FSW.
2384;
2385; @param 1 The instruction
2386;
2387; @param A0 FPU context (fxsave).
2388; @param A1 Pointer to a uint16_t for the resulting FSW.
2389; @param A2 Pointer to the first 80-bit value.
2390; @param A3 Pointer to the second 80-bit value.
2391;
2392%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2393BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2394 PROLOGUE_4_ARGS
2395 sub xSP, 20h
2396
2397 fninit
2398 fld tword [A3]
2399 fld tword [A2]
2400 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2401 %1 st0, st1
2402
2403 fnstsw word [A1]
2404
2405 fninit
2406 add xSP, 20h
2407 EPILOGUE_4_ARGS
2408ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2409%endmacro
2410
2411IEMIMPL_FPU_R80_BY_R80_FSW fcom
2412IEMIMPL_FPU_R80_BY_R80_FSW fucom
2413
2414
2415;;
2416; FPU instruction working on two 80-bit floating point values,
2417; returning FSW and EFLAGS (eax).
2418;
2419; @param 1 The instruction
2420;
2421; @returns EFLAGS in EAX.
2422; @param A0 FPU context (fxsave).
2423; @param A1 Pointer to a uint16_t for the resulting FSW.
2424; @param A2 Pointer to the first 80-bit value.
2425; @param A3 Pointer to the second 80-bit value.
2426;
2427%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2428BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2429 PROLOGUE_4_ARGS
2430 sub xSP, 20h
2431
2432 fninit
2433 fld tword [A3]
2434 fld tword [A2]
2435 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2436 %1 st1
2437
2438 fnstsw word [A1]
2439 pushf
2440 pop xAX
2441
2442 fninit
2443 add xSP, 20h
2444 EPILOGUE_4_ARGS
2445ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2446%endmacro
2447
2448IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2449IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2450
2451
2452;;
2453; FPU instruction working on one 80-bit floating point value.
2454;
2455; @param 1 The instruction
2456;
2457; @param A0 FPU context (fxsave).
2458; @param A1 Pointer to a IEMFPURESULT for the output.
2459; @param A2 Pointer to the 80-bit value.
2460;
2461%macro IEMIMPL_FPU_R80 1
2462BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2463 PROLOGUE_3_ARGS
2464 sub xSP, 20h
2465
2466 fninit
2467 fld tword [A2]
2468 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2469 %1
2470
2471 fnstsw word [A1 + IEMFPURESULT.FSW]
2472 fnclex
2473 fstp tword [A1 + IEMFPURESULT.r80Result]
2474
2475 fninit
2476 add xSP, 20h
2477 EPILOGUE_3_ARGS
2478ENDPROC iemAImpl_ %+ %1 %+ _r80
2479%endmacro
2480
2481IEMIMPL_FPU_R80 fchs
2482IEMIMPL_FPU_R80 fabs
2483IEMIMPL_FPU_R80 f2xm1
2484IEMIMPL_FPU_R80 fyl2x
2485IEMIMPL_FPU_R80 fsqrt
2486IEMIMPL_FPU_R80 frndint
2487IEMIMPL_FPU_R80 fsin
2488IEMIMPL_FPU_R80 fcos
2489
2490
2491;;
2492; FPU instruction working on one 80-bit floating point value, only
2493; returning FSW.
2494;
2495; @param 1 The instruction
2496;
2497; @param A0 FPU context (fxsave).
2498; @param A1 Pointer to a uint16_t for the resulting FSW.
2499; @param A2 Pointer to the 80-bit value.
2500;
2501%macro IEMIMPL_FPU_R80_FSW 1
2502BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2503 PROLOGUE_3_ARGS
2504 sub xSP, 20h
2505
2506 fninit
2507 fld tword [A2]
2508 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2509 %1
2510
2511 fnstsw word [A1]
2512
2513 fninit
2514 add xSP, 20h
2515 EPILOGUE_3_ARGS
2516ENDPROC iemAImpl_ %+ %1 %+ _r80
2517%endmacro
2518
2519IEMIMPL_FPU_R80_FSW ftst
2520IEMIMPL_FPU_R80_FSW fxam
2521
2522
2523
2524;;
2525; FPU instruction loading a 80-bit floating point constant.
2526;
2527; @param 1 The instruction
2528;
2529; @param A0 FPU context (fxsave).
2530; @param A1 Pointer to a IEMFPURESULT for the output.
2531;
2532%macro IEMIMPL_FPU_R80_CONST 1
2533BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2534 PROLOGUE_2_ARGS
2535 sub xSP, 20h
2536
2537 fninit
2538 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2539 %1
2540
2541 fnstsw word [A1 + IEMFPURESULT.FSW]
2542 fnclex
2543 fstp tword [A1 + IEMFPURESULT.r80Result]
2544
2545 fninit
2546 add xSP, 20h
2547 EPILOGUE_2_ARGS
2548ENDPROC iemAImpl_ %+ %1 %+
2549%endmacro
2550
2551IEMIMPL_FPU_R80_CONST fld1
2552IEMIMPL_FPU_R80_CONST fldl2t
2553IEMIMPL_FPU_R80_CONST fldl2e
2554IEMIMPL_FPU_R80_CONST fldpi
2555IEMIMPL_FPU_R80_CONST fldlg2
2556IEMIMPL_FPU_R80_CONST fldln2
2557IEMIMPL_FPU_R80_CONST fldz
2558
2559
2560;;
2561; FPU instruction working on one 80-bit floating point value, outputing two.
2562;
2563; @param 1 The instruction
2564;
2565; @param A0 FPU context (fxsave).
2566; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2567; @param A2 Pointer to the 80-bit value.
2568;
2569%macro IEMIMPL_FPU_R80_R80 1
2570BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2571 PROLOGUE_3_ARGS
2572 sub xSP, 20h
2573
2574 fninit
2575 fld tword [A2]
2576 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2577 %1
2578
2579 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2580 fnclex
2581 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2582 fnclex
2583 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2584
2585 fninit
2586 add xSP, 20h
2587 EPILOGUE_3_ARGS
2588ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2589%endmacro
2590
2591IEMIMPL_FPU_R80_R80 fptan
2592IEMIMPL_FPU_R80_R80 fxtract
2593IEMIMPL_FPU_R80_R80 fsincos
2594
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette