VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 93926

Last change on this file since 93926 was 93906, checked in by vboxsync, 3 years ago

IEM: Implemented fbstp instruction (used by OLE and indirectly MS Word 6.0 and similar).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 84.1 KB
Line 
1; $Id: IEMAllAImpl.asm 93906 2022-02-24 10:28:32Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20; Header Files ;
21;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28; Defined Constants And Macros ;
29;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175%else
176 ; x86
177 %macro PROLOGUE_1_ARGS 0
178 push edi
179 %endmacro
180 %macro EPILOGUE_1_ARGS 0
181 pop edi
182 ret 0
183 %endmacro
184 %macro EPILOGUE_1_ARGS_EX 1
185 pop edi
186 ret %1
187 %endmacro
188
189 %macro PROLOGUE_2_ARGS 0
190 push edi
191 %endmacro
192 %macro EPILOGUE_2_ARGS 0
193 pop edi
194 ret 0
195 %endmacro
196 %macro EPILOGUE_2_ARGS_EX 1
197 pop edi
198 ret %1
199 %endmacro
200
201 %macro PROLOGUE_3_ARGS 0
202 push ebx
203 mov ebx, [esp + 4 + 4]
204 push edi
205 %endmacro
206 %macro EPILOGUE_3_ARGS_EX 1
207 %if (%1) < 4
208 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
209 %endif
210 pop edi
211 pop ebx
212 ret %1
213 %endmacro
214 %macro EPILOGUE_3_ARGS 0
215 EPILOGUE_3_ARGS_EX 4
216 %endmacro
217
218 %macro PROLOGUE_4_ARGS 0
219 push ebx
220 push edi
221 push esi
222 mov ebx, [esp + 12 + 4 + 0]
223 mov esi, [esp + 12 + 4 + 4]
224 %endmacro
225 %macro EPILOGUE_4_ARGS_EX 1
226 %if (%1) < 8
227 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
228 %endif
229 pop esi
230 pop edi
231 pop ebx
232 ret %1
233 %endmacro
234 %macro EPILOGUE_4_ARGS 0
235 EPILOGUE_4_ARGS_EX 8
236 %endmacro
237
238 %define A0 ecx
239 %define A0_32 ecx
240 %define A0_16 cx
241 %define A0_8 cl
242
243 %define A1 edx
244 %define A1_32 edx
245 %define A1_16 dx
246 %define A1_8 dl
247
248 %define A2 ebx
249 %define A2_32 ebx
250 %define A2_16 bx
251 %define A2_8 bl
252
253 %define A3 esi
254 %define A3_32 esi
255 %define A3_16 si
256
257 %define T0 eax
258 %define T0_32 eax
259 %define T0_16 ax
260 %define T0_8 al
261
262 %define T1 edi
263 %define T1_32 edi
264 %define T1_16 di
265%endif
266
267
268;;
269; Load the relevant flags from [%1] if there are undefined flags (%3).
270;
271; @remarks Clobbers T0, stack. Changes EFLAGS.
272; @param A2 The register pointing to the flags.
273; @param 1 The parameter (A0..A3) pointing to the eflags.
274; @param 2 The set of modified flags.
275; @param 3 The set of undefined flags.
276;
277%macro IEM_MAYBE_LOAD_FLAGS 3
278 ;%if (%3) != 0
279 pushf ; store current flags
280 mov T0_32, [%1] ; load the guest flags
281 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
282 and T0_32, (%2 | %3) ; select the modified and undefined flags.
283 or [xSP], T0 ; merge guest flags with host flags.
284 popf ; load the mixed flags.
285 ;%endif
286%endmacro
287
288;;
289; Update the flag.
290;
291; @remarks Clobbers T0, T1, stack.
292; @param 1 The register pointing to the EFLAGS.
293; @param 2 The mask of modified flags to save.
294; @param 3 The mask of undefined flags to (maybe) save.
295;
296%macro IEM_SAVE_FLAGS 3
297 %if (%2 | %3) != 0
298 pushf
299 pop T1
300 mov T0_32, [%1] ; flags
301 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
302 and T1_32, (%2 | %3) ; select the modified and undefined flags.
303 or T0_32, T1_32 ; combine the flags.
304 mov [%1], T0_32 ; save the flags.
305 %endif
306%endmacro
307
308
309;;
310; Macro for implementing a binary operator.
311;
312; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
313; variants, except on 32-bit system where the 64-bit accesses requires hand
314; coding.
315;
316; All the functions takes a pointer to the destination memory operand in A0,
317; the source register operand in A1 and a pointer to eflags in A2.
318;
319; @param 1 The instruction mnemonic.
320; @param 2 Non-zero if there should be a locked version.
321; @param 3 The modified flags.
322; @param 4 The undefined flags.
323;
324%macro IEMIMPL_BIN_OP 4
325BEGINCODE
326BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
327 PROLOGUE_3_ARGS
328 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
329 %1 byte [A0], A1_8
330 IEM_SAVE_FLAGS A2, %3, %4
331 EPILOGUE_3_ARGS
332ENDPROC iemAImpl_ %+ %1 %+ _u8
333
334BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
335 PROLOGUE_3_ARGS
336 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337 %1 word [A0], A1_16
338 IEM_SAVE_FLAGS A2, %3, %4
339 EPILOGUE_3_ARGS
340ENDPROC iemAImpl_ %+ %1 %+ _u16
341
342BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
343 PROLOGUE_3_ARGS
344 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345 %1 dword [A0], A1_32
346 IEM_SAVE_FLAGS A2, %3, %4
347 EPILOGUE_3_ARGS
348ENDPROC iemAImpl_ %+ %1 %+ _u32
349
350 %ifdef RT_ARCH_AMD64
351BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
352 PROLOGUE_3_ARGS
353 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
354 %1 qword [A0], A1
355 IEM_SAVE_FLAGS A2, %3, %4
356 EPILOGUE_3_ARGS_EX 8
357ENDPROC iemAImpl_ %+ %1 %+ _u64
358 %endif ; RT_ARCH_AMD64
359
360 %if %2 != 0 ; locked versions requested?
361
362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
363 PROLOGUE_3_ARGS
364 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
365 lock %1 byte [A0], A1_8
366 IEM_SAVE_FLAGS A2, %3, %4
367 EPILOGUE_3_ARGS
368ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
369
370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
371 PROLOGUE_3_ARGS
372 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
373 lock %1 word [A0], A1_16
374 IEM_SAVE_FLAGS A2, %3, %4
375 EPILOGUE_3_ARGS
376ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
377
378BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
379 PROLOGUE_3_ARGS
380 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
381 lock %1 dword [A0], A1_32
382 IEM_SAVE_FLAGS A2, %3, %4
383 EPILOGUE_3_ARGS
384ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
385
386 %ifdef RT_ARCH_AMD64
387BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
388 PROLOGUE_3_ARGS
389 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
390 lock %1 qword [A0], A1
391 IEM_SAVE_FLAGS A2, %3, %4
392 EPILOGUE_3_ARGS_EX 8
393ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
394 %endif ; RT_ARCH_AMD64
395 %endif ; locked
396%endmacro
397
398; instr,lock,modified-flags.
399IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
403IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
406IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
407IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
408
409
410;;
411; Macro for implementing a bit operator.
412;
413; This will generate code for the 16, 32 and 64 bit accesses with locked
414; variants, except on 32-bit system where the 64-bit accesses requires hand
415; coding.
416;
417; All the functions takes a pointer to the destination memory operand in A0,
418; the source register operand in A1 and a pointer to eflags in A2.
419;
420; @param 1 The instruction mnemonic.
421; @param 2 Non-zero if there should be a locked version.
422; @param 3 The modified flags.
423; @param 4 The undefined flags.
424;
425%macro IEMIMPL_BIT_OP 4
426BEGINCODE
427BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
428 PROLOGUE_3_ARGS
429 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
430 %1 word [A0], A1_16
431 IEM_SAVE_FLAGS A2, %3, %4
432 EPILOGUE_3_ARGS
433ENDPROC iemAImpl_ %+ %1 %+ _u16
434
435BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
436 PROLOGUE_3_ARGS
437 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
438 %1 dword [A0], A1_32
439 IEM_SAVE_FLAGS A2, %3, %4
440 EPILOGUE_3_ARGS
441ENDPROC iemAImpl_ %+ %1 %+ _u32
442
443 %ifdef RT_ARCH_AMD64
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 qword [A0], A1
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS_EX 8
450ENDPROC iemAImpl_ %+ %1 %+ _u64
451 %endif ; RT_ARCH_AMD64
452
453 %if %2 != 0 ; locked versions requested?
454
455BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
456 PROLOGUE_3_ARGS
457 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
458 lock %1 word [A0], A1_16
459 IEM_SAVE_FLAGS A2, %3, %4
460 EPILOGUE_3_ARGS
461ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
462
463BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
464 PROLOGUE_3_ARGS
465 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
466 lock %1 dword [A0], A1_32
467 IEM_SAVE_FLAGS A2, %3, %4
468 EPILOGUE_3_ARGS
469ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
470
471 %ifdef RT_ARCH_AMD64
472BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
473 PROLOGUE_3_ARGS
474 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
475 lock %1 qword [A0], A1
476 IEM_SAVE_FLAGS A2, %3, %4
477 EPILOGUE_3_ARGS_EX 8
478ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
479 %endif ; RT_ARCH_AMD64
480 %endif ; locked
481%endmacro
482IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
486
487;;
488; Macro for implementing a bit search operator.
489;
490; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
491; system where the 64-bit accesses requires hand coding.
492;
493; All the functions takes a pointer to the destination memory operand in A0,
494; the source register operand in A1 and a pointer to eflags in A2.
495;
496; @param 1 The instruction mnemonic.
497; @param 2 The modified flags.
498; @param 3 The undefined flags.
499;
500%macro IEMIMPL_BIT_OP 3
501BEGINCODE
502BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
503 PROLOGUE_3_ARGS
504 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
505 %1 T0_16, A1_16
506 jz .unchanged_dst
507 mov [A0], T0_16
508.unchanged_dst:
509 IEM_SAVE_FLAGS A2, %2, %3
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
516 %1 T0_32, A1_32
517 jz .unchanged_dst
518 mov [A0], T0_32
519.unchanged_dst:
520 IEM_SAVE_FLAGS A2, %2, %3
521 EPILOGUE_3_ARGS
522ENDPROC iemAImpl_ %+ %1 %+ _u32
523
524 %ifdef RT_ARCH_AMD64
525BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
526 PROLOGUE_3_ARGS
527 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
528 %1 T0, A1
529 jz .unchanged_dst
530 mov [A0], T0
531.unchanged_dst:
532 IEM_SAVE_FLAGS A2, %2, %3
533 EPILOGUE_3_ARGS_EX 8
534ENDPROC iemAImpl_ %+ %1 %+ _u64
535 %endif ; RT_ARCH_AMD64
536%endmacro
537IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
539
540
541;
542; IMUL is also a similar but yet different case (no lock, no mem dst).
543; The rDX:rAX variant of imul is handled together with mul further down.
544;
545BEGINCODE
546BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
547 PROLOGUE_3_ARGS
548 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
549 imul A1_16, word [A0]
550 mov [A0], A1_16
551 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
552 EPILOGUE_3_ARGS
553ENDPROC iemAImpl_imul_two_u16
554
555BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
556 PROLOGUE_3_ARGS
557 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
558 imul A1_32, dword [A0]
559 mov [A0], A1_32
560 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
561 EPILOGUE_3_ARGS
562ENDPROC iemAImpl_imul_two_u32
563
564%ifdef RT_ARCH_AMD64
565BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
566 PROLOGUE_3_ARGS
567 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
568 imul A1, qword [A0]
569 mov [A0], A1
570 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
571 EPILOGUE_3_ARGS_EX 8
572ENDPROC iemAImpl_imul_two_u64
573%endif ; RT_ARCH_AMD64
574
575
576;
577; XCHG for memory operands. This implies locking. No flag changes.
578;
579; Each function takes two arguments, first the pointer to the memory,
580; then the pointer to the register. They all return void.
581;
582BEGINCODE
583BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
584 PROLOGUE_2_ARGS
585 mov T0_8, [A1]
586 xchg [A0], T0_8
587 mov [A1], T0_8
588 EPILOGUE_2_ARGS
589ENDPROC iemAImpl_xchg_u8_locked
590
591BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
592 PROLOGUE_2_ARGS
593 mov T0_16, [A1]
594 xchg [A0], T0_16
595 mov [A1], T0_16
596 EPILOGUE_2_ARGS
597ENDPROC iemAImpl_xchg_u16_locked
598
599BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
600 PROLOGUE_2_ARGS
601 mov T0_32, [A1]
602 xchg [A0], T0_32
603 mov [A1], T0_32
604 EPILOGUE_2_ARGS
605ENDPROC iemAImpl_xchg_u32_locked
606
607%ifdef RT_ARCH_AMD64
608BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
609 PROLOGUE_2_ARGS
610 mov T0, [A1]
611 xchg [A0], T0
612 mov [A1], T0
613 EPILOGUE_2_ARGS
614ENDPROC iemAImpl_xchg_u64_locked
615%endif
616
617; Unlocked variants for fDisregardLock mode.
618
619BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
620 PROLOGUE_2_ARGS
621 mov T0_8, [A1]
622 mov T1_8, [A0]
623 mov [A0], T0_8
624 mov [A1], T1_8
625 EPILOGUE_2_ARGS
626ENDPROC iemAImpl_xchg_u8_unlocked
627
628BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
629 PROLOGUE_2_ARGS
630 mov T0_16, [A1]
631 mov T1_16, [A0]
632 mov [A0], T0_16
633 mov [A1], T1_16
634 EPILOGUE_2_ARGS
635ENDPROC iemAImpl_xchg_u16_unlocked
636
637BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
638 PROLOGUE_2_ARGS
639 mov T0_32, [A1]
640 mov T1_32, [A0]
641 mov [A0], T0_32
642 mov [A1], T1_32
643 EPILOGUE_2_ARGS
644ENDPROC iemAImpl_xchg_u32_unlocked
645
646%ifdef RT_ARCH_AMD64
647BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
648 PROLOGUE_2_ARGS
649 mov T0, [A1]
650 mov T1, [A0]
651 mov [A0], T0
652 mov [A1], T1
653 EPILOGUE_2_ARGS
654ENDPROC iemAImpl_xchg_u64_unlocked
655%endif
656
657
658;
659; XADD for memory operands.
660;
661; Each function takes three arguments, first the pointer to the
662; memory/register, then the pointer to the register, and finally a pointer to
663; eflags. They all return void.
664;
665BEGINCODE
666BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
667 PROLOGUE_3_ARGS
668 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
669 mov T0_8, [A1]
670 xadd [A0], T0_8
671 mov [A1], T0_8
672 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
673 EPILOGUE_3_ARGS
674ENDPROC iemAImpl_xadd_u8
675
676BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
677 PROLOGUE_3_ARGS
678 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
679 mov T0_16, [A1]
680 xadd [A0], T0_16
681 mov [A1], T0_16
682 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
683 EPILOGUE_3_ARGS
684ENDPROC iemAImpl_xadd_u16
685
686BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0_32, [A1]
690 xadd [A0], T0_32
691 mov [A1], T0_32
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694ENDPROC iemAImpl_xadd_u32
695
696%ifdef RT_ARCH_AMD64
697BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
698 PROLOGUE_3_ARGS
699 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
700 mov T0, [A1]
701 xadd [A0], T0
702 mov [A1], T0
703 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
704 EPILOGUE_3_ARGS
705ENDPROC iemAImpl_xadd_u64
706%endif ; RT_ARCH_AMD64
707
708BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
709 PROLOGUE_3_ARGS
710 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
711 mov T0_8, [A1]
712 lock xadd [A0], T0_8
713 mov [A1], T0_8
714 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
715 EPILOGUE_3_ARGS
716ENDPROC iemAImpl_xadd_u8_locked
717
718BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
719 PROLOGUE_3_ARGS
720 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
721 mov T0_16, [A1]
722 lock xadd [A0], T0_16
723 mov [A1], T0_16
724 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
725 EPILOGUE_3_ARGS
726ENDPROC iemAImpl_xadd_u16_locked
727
728BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
729 PROLOGUE_3_ARGS
730 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
731 mov T0_32, [A1]
732 lock xadd [A0], T0_32
733 mov [A1], T0_32
734 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
735 EPILOGUE_3_ARGS
736ENDPROC iemAImpl_xadd_u32_locked
737
738%ifdef RT_ARCH_AMD64
739BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
740 PROLOGUE_3_ARGS
741 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
742 mov T0, [A1]
743 lock xadd [A0], T0
744 mov [A1], T0
745 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
746 EPILOGUE_3_ARGS
747ENDPROC iemAImpl_xadd_u64_locked
748%endif ; RT_ARCH_AMD64
749
750
751;
752; CMPXCHG8B.
753;
754; These are tricky register wise, so the code is duplicated for each calling
755; convention.
756;
757; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
758;
759; C-proto:
760; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
761; uint32_t *pEFlags));
762;
763; Note! Identical to iemAImpl_cmpxchg16b.
764;
765BEGINCODE
766BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
767%ifdef RT_ARCH_AMD64
768 %ifdef ASM_CALL64_MSC
769 push rbx
770
771 mov r11, rdx ; pu64EaxEdx (is also T1)
772 mov r10, rcx ; pu64Dst
773
774 mov ebx, [r8]
775 mov ecx, [r8 + 4]
776 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
777 mov eax, [r11]
778 mov edx, [r11 + 4]
779
780 lock cmpxchg8b [r10]
781
782 mov [r11], eax
783 mov [r11 + 4], edx
784 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
785
786 pop rbx
787 ret
788 %else
789 push rbx
790
791 mov r10, rcx ; pEFlags
792 mov r11, rdx ; pu64EbxEcx (is also T1)
793
794 mov ebx, [r11]
795 mov ecx, [r11 + 4]
796 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
797 mov eax, [rsi]
798 mov edx, [rsi + 4]
799
800 lock cmpxchg8b [rdi]
801
802 mov [rsi], eax
803 mov [rsi + 4], edx
804 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
805
806 pop rbx
807 ret
808
809 %endif
810%else
811 push esi
812 push edi
813 push ebx
814 push ebp
815
816 mov edi, ecx ; pu64Dst
817 mov esi, edx ; pu64EaxEdx
818 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
819 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
820
821 mov ebx, [ecx]
822 mov ecx, [ecx + 4]
823 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
824 mov eax, [esi]
825 mov edx, [esi + 4]
826
827 lock cmpxchg8b [edi]
828
829 mov [esi], eax
830 mov [esi + 4], edx
831 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
832
833 pop ebp
834 pop ebx
835 pop edi
836 pop esi
837 ret 8
838%endif
839ENDPROC iemAImpl_cmpxchg8b
840
841BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
842 ; Lazy bird always lock prefixes cmpxchg8b.
843 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
844ENDPROC iemAImpl_cmpxchg8b_locked
845
846%ifdef RT_ARCH_AMD64
847
848;
849; CMPXCHG16B.
850;
851; These are tricky register wise, so the code is duplicated for each calling
852; convention.
853;
854; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
855;
856; C-proto:
857; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
858; uint32_t *pEFlags));
859;
860; Note! Identical to iemAImpl_cmpxchg8b.
861;
862BEGINCODE
863BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
864 %ifdef ASM_CALL64_MSC
865 push rbx
866
867 mov r11, rdx ; pu64RaxRdx (is also T1)
868 mov r10, rcx ; pu64Dst
869
870 mov rbx, [r8]
871 mov rcx, [r8 + 8]
872 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
873 mov rax, [r11]
874 mov rdx, [r11 + 8]
875
876 lock cmpxchg16b [r10]
877
878 mov [r11], rax
879 mov [r11 + 8], rdx
880 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
881
882 pop rbx
883 ret
884 %else
885 push rbx
886
887 mov r10, rcx ; pEFlags
888 mov r11, rdx ; pu64RbxRcx (is also T1)
889
890 mov rbx, [r11]
891 mov rcx, [r11 + 8]
892 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
893 mov rax, [rsi]
894 mov rdx, [rsi + 8]
895
896 lock cmpxchg16b [rdi]
897
898 mov [rsi], eax
899 mov [rsi + 8], edx
900 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
901
902 pop rbx
903 ret
904
905 %endif
906ENDPROC iemAImpl_cmpxchg16b
907
908BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
909 ; Lazy bird always lock prefixes cmpxchg8b.
910 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
911ENDPROC iemAImpl_cmpxchg16b_locked
912
913%endif ; RT_ARCH_AMD64
914
915
916;
917; CMPXCHG.
918;
919; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
920;
921; C-proto:
922; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
923;
924BEGINCODE
925%macro IEMIMPL_CMPXCHG 2
926BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
927 PROLOGUE_4_ARGS
928 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
929 mov al, [A1]
930 %1 cmpxchg [A0], A2_8
931 mov [A1], al
932 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
933 EPILOGUE_4_ARGS
934ENDPROC iemAImpl_cmpxchg_u8 %+ %2
935
936BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
937 PROLOGUE_4_ARGS
938 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
939 mov ax, [A1]
940 %1 cmpxchg [A0], A2_16
941 mov [A1], ax
942 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
943 EPILOGUE_4_ARGS
944ENDPROC iemAImpl_cmpxchg_u16 %+ %2
945
946BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
947 PROLOGUE_4_ARGS
948 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
949 mov eax, [A1]
950 %1 cmpxchg [A0], A2_32
951 mov [A1], eax
952 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
953 EPILOGUE_4_ARGS
954ENDPROC iemAImpl_cmpxchg_u32 %+ %2
955
956BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
957%ifdef RT_ARCH_AMD64
958 PROLOGUE_4_ARGS
959 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
960 mov rax, [A1]
961 %1 cmpxchg [A0], A2
962 mov [A1], rax
963 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
964 EPILOGUE_4_ARGS
965%else
966 ;
967 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
968 ;
969 push esi
970 push edi
971 push ebx
972 push ebp
973
974 mov edi, ecx ; pu64Dst
975 mov esi, edx ; pu64Rax
976 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
977 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
978
979 mov ebx, [ecx]
980 mov ecx, [ecx + 4]
981 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
982 mov eax, [esi]
983 mov edx, [esi + 4]
984
985 lock cmpxchg8b [edi]
986
987 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
988 jz .cmpxchg8b_not_equal
989 cmp eax, eax ; just set the other flags.
990.store:
991 mov [esi], eax
992 mov [esi + 4], edx
993 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
994
995 pop ebp
996 pop ebx
997 pop edi
998 pop esi
999 ret 8
1000
1001.cmpxchg8b_not_equal:
1002 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1003 jne .store
1004 cmp [esi], eax
1005 jmp .store
1006
1007%endif
1008ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1009%endmacro ; IEMIMPL_CMPXCHG
1010
1011IEMIMPL_CMPXCHG , ,
1012IEMIMPL_CMPXCHG lock, _locked
1013
1014;;
1015; Macro for implementing a unary operator.
1016;
1017; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1018; variants, except on 32-bit system where the 64-bit accesses requires hand
1019; coding.
1020;
1021; All the functions takes a pointer to the destination memory operand in A0,
1022; the source register operand in A1 and a pointer to eflags in A2.
1023;
1024; @param 1 The instruction mnemonic.
1025; @param 2 The modified flags.
1026; @param 3 The undefined flags.
1027;
1028%macro IEMIMPL_UNARY_OP 3
1029BEGINCODE
1030BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1031 PROLOGUE_2_ARGS
1032 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1033 %1 byte [A0]
1034 IEM_SAVE_FLAGS A1, %2, %3
1035 EPILOGUE_2_ARGS
1036ENDPROC iemAImpl_ %+ %1 %+ _u8
1037
1038BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1039 PROLOGUE_2_ARGS
1040 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1041 lock %1 byte [A0]
1042 IEM_SAVE_FLAGS A1, %2, %3
1043 EPILOGUE_2_ARGS
1044ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1045
1046BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1047 PROLOGUE_2_ARGS
1048 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1049 %1 word [A0]
1050 IEM_SAVE_FLAGS A1, %2, %3
1051 EPILOGUE_2_ARGS
1052ENDPROC iemAImpl_ %+ %1 %+ _u16
1053
1054BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1055 PROLOGUE_2_ARGS
1056 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1057 lock %1 word [A0]
1058 IEM_SAVE_FLAGS A1, %2, %3
1059 EPILOGUE_2_ARGS
1060ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1061
1062BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1063 PROLOGUE_2_ARGS
1064 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1065 %1 dword [A0]
1066 IEM_SAVE_FLAGS A1, %2, %3
1067 EPILOGUE_2_ARGS
1068ENDPROC iemAImpl_ %+ %1 %+ _u32
1069
1070BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1071 PROLOGUE_2_ARGS
1072 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1073 lock %1 dword [A0]
1074 IEM_SAVE_FLAGS A1, %2, %3
1075 EPILOGUE_2_ARGS
1076ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1077
1078 %ifdef RT_ARCH_AMD64
1079BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1080 PROLOGUE_2_ARGS
1081 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1082 %1 qword [A0]
1083 IEM_SAVE_FLAGS A1, %2, %3
1084 EPILOGUE_2_ARGS
1085ENDPROC iemAImpl_ %+ %1 %+ _u64
1086
1087BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1088 PROLOGUE_2_ARGS
1089 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1090 lock %1 qword [A0]
1091 IEM_SAVE_FLAGS A1, %2, %3
1092 EPILOGUE_2_ARGS
1093ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1094 %endif ; RT_ARCH_AMD64
1095
1096%endmacro
1097
1098IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1099IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1100IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1101IEMIMPL_UNARY_OP not, 0, 0
1102
1103
1104;
1105; BSWAP. No flag changes.
1106;
1107; Each function takes one argument, pointer to the value to bswap
1108; (input/output). They all return void.
1109;
1110BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1111 PROLOGUE_1_ARGS
1112 mov T0_32, [A0] ; just in case any of the upper bits are used.
1113 db 66h
1114 bswap T0_32
1115 mov [A0], T0_32
1116 EPILOGUE_1_ARGS
1117ENDPROC iemAImpl_bswap_u16
1118
1119BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1120 PROLOGUE_1_ARGS
1121 mov T0_32, [A0]
1122 bswap T0_32
1123 mov [A0], T0_32
1124 EPILOGUE_1_ARGS
1125ENDPROC iemAImpl_bswap_u32
1126
1127BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1128%ifdef RT_ARCH_AMD64
1129 PROLOGUE_1_ARGS
1130 mov T0, [A0]
1131 bswap T0
1132 mov [A0], T0
1133 EPILOGUE_1_ARGS
1134%else
1135 PROLOGUE_1_ARGS
1136 mov T0, [A0]
1137 mov T1, [A0 + 4]
1138 bswap T0
1139 bswap T1
1140 mov [A0 + 4], T0
1141 mov [A0], T1
1142 EPILOGUE_1_ARGS
1143%endif
1144ENDPROC iemAImpl_bswap_u64
1145
1146
1147;;
1148; Macro for implementing a shift operation.
1149;
1150; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1151; 32-bit system where the 64-bit accesses requires hand coding.
1152;
1153; All the functions takes a pointer to the destination memory operand in A0,
1154; the shift count in A1 and a pointer to eflags in A2.
1155;
1156; @param 1 The instruction mnemonic.
1157; @param 2 The modified flags.
1158; @param 3 The undefined flags.
1159;
1160; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1161;
1162%macro IEMIMPL_SHIFT_OP 3
1163BEGINCODE
1164BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1165 PROLOGUE_3_ARGS
1166 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1167 %ifdef ASM_CALL64_GCC
1168 mov cl, A1_8
1169 %1 byte [A0], cl
1170 %else
1171 xchg A1, A0
1172 %1 byte [A1], cl
1173 %endif
1174 IEM_SAVE_FLAGS A2, %2, %3
1175 EPILOGUE_3_ARGS
1176ENDPROC iemAImpl_ %+ %1 %+ _u8
1177
1178BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1179 PROLOGUE_3_ARGS
1180 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1181 %ifdef ASM_CALL64_GCC
1182 mov cl, A1_8
1183 %1 word [A0], cl
1184 %else
1185 xchg A1, A0
1186 %1 word [A1], cl
1187 %endif
1188 IEM_SAVE_FLAGS A2, %2, %3
1189 EPILOGUE_3_ARGS
1190ENDPROC iemAImpl_ %+ %1 %+ _u16
1191
1192BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1193 PROLOGUE_3_ARGS
1194 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1195 %ifdef ASM_CALL64_GCC
1196 mov cl, A1_8
1197 %1 dword [A0], cl
1198 %else
1199 xchg A1, A0
1200 %1 dword [A1], cl
1201 %endif
1202 IEM_SAVE_FLAGS A2, %2, %3
1203 EPILOGUE_3_ARGS
1204ENDPROC iemAImpl_ %+ %1 %+ _u32
1205
1206 %ifdef RT_ARCH_AMD64
1207BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1208 PROLOGUE_3_ARGS
1209 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1210 %ifdef ASM_CALL64_GCC
1211 mov cl, A1_8
1212 %1 qword [A0], cl
1213 %else
1214 xchg A1, A0
1215 %1 qword [A1], cl
1216 %endif
1217 IEM_SAVE_FLAGS A2, %2, %3
1218 EPILOGUE_3_ARGS
1219ENDPROC iemAImpl_ %+ %1 %+ _u64
1220 %endif ; RT_ARCH_AMD64
1221
1222%endmacro
1223
1224IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1225IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1226IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1227IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1228IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1229IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1230IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1231
1232
1233;;
1234; Macro for implementing a double precision shift operation.
1235;
1236; This will generate code for the 16, 32 and 64 bit accesses, except on
1237; 32-bit system where the 64-bit accesses requires hand coding.
1238;
1239; The functions takes the destination operand (r/m) in A0, the source (reg) in
1240; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1241;
1242; @param 1 The instruction mnemonic.
1243; @param 2 The modified flags.
1244; @param 3 The undefined flags.
1245;
1246; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1247;
1248%macro IEMIMPL_SHIFT_DBL_OP 3
1249BEGINCODE
1250BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1251 PROLOGUE_4_ARGS
1252 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1253 %ifdef ASM_CALL64_GCC
1254 xchg A3, A2
1255 %1 [A0], A1_16, cl
1256 xchg A3, A2
1257 %else
1258 xchg A0, A2
1259 %1 [A2], A1_16, cl
1260 %endif
1261 IEM_SAVE_FLAGS A3, %2, %3
1262 EPILOGUE_4_ARGS
1263ENDPROC iemAImpl_ %+ %1 %+ _u16
1264
1265BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1266 PROLOGUE_4_ARGS
1267 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1268 %ifdef ASM_CALL64_GCC
1269 xchg A3, A2
1270 %1 [A0], A1_32, cl
1271 xchg A3, A2
1272 %else
1273 xchg A0, A2
1274 %1 [A2], A1_32, cl
1275 %endif
1276 IEM_SAVE_FLAGS A3, %2, %3
1277 EPILOGUE_4_ARGS
1278ENDPROC iemAImpl_ %+ %1 %+ _u32
1279
1280 %ifdef RT_ARCH_AMD64
1281BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1282 PROLOGUE_4_ARGS
1283 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1284 %ifdef ASM_CALL64_GCC
1285 xchg A3, A2
1286 %1 [A0], A1, cl
1287 xchg A3, A2
1288 %else
1289 xchg A0, A2
1290 %1 [A2], A1, cl
1291 %endif
1292 IEM_SAVE_FLAGS A3, %2, %3
1293 EPILOGUE_4_ARGS_EX 12
1294ENDPROC iemAImpl_ %+ %1 %+ _u64
1295 %endif ; RT_ARCH_AMD64
1296
1297%endmacro
1298
1299IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1300IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1301
1302
1303;;
1304; Macro for implementing a multiplication operations.
1305;
1306; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1307; 32-bit system where the 64-bit accesses requires hand coding.
1308;
1309; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1310; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1311; pointer to eflags in A3.
1312;
1313; The functions all return 0 so the caller can be used for div/idiv as well as
1314; for the mul/imul implementation.
1315;
1316; @param 1 The instruction mnemonic.
1317; @param 2 The modified flags.
1318; @param 3 The undefined flags.
1319;
1320; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1321;
1322%macro IEMIMPL_MUL_OP 3
1323BEGINCODE
1324BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1325 PROLOGUE_3_ARGS
1326 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1327 mov al, [A0]
1328 %1 A1_8
1329 mov [A0], ax
1330 IEM_SAVE_FLAGS A2, %2, %3
1331 xor eax, eax
1332 EPILOGUE_3_ARGS
1333ENDPROC iemAImpl_ %+ %1 %+ _u8
1334
1335BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1336 PROLOGUE_4_ARGS
1337 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1338 mov ax, [A0]
1339 %ifdef ASM_CALL64_GCC
1340 %1 A2_16
1341 mov [A0], ax
1342 mov [A1], dx
1343 %else
1344 mov T1, A1
1345 %1 A2_16
1346 mov [A0], ax
1347 mov [T1], dx
1348 %endif
1349 IEM_SAVE_FLAGS A3, %2, %3
1350 xor eax, eax
1351 EPILOGUE_4_ARGS
1352ENDPROC iemAImpl_ %+ %1 %+ _u16
1353
1354BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1355 PROLOGUE_4_ARGS
1356 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1357 mov eax, [A0]
1358 %ifdef ASM_CALL64_GCC
1359 %1 A2_32
1360 mov [A0], eax
1361 mov [A1], edx
1362 %else
1363 mov T1, A1
1364 %1 A2_32
1365 mov [A0], eax
1366 mov [T1], edx
1367 %endif
1368 IEM_SAVE_FLAGS A3, %2, %3
1369 xor eax, eax
1370 EPILOGUE_4_ARGS
1371ENDPROC iemAImpl_ %+ %1 %+ _u32
1372
1373 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1375 PROLOGUE_4_ARGS
1376 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1377 mov rax, [A0]
1378 %ifdef ASM_CALL64_GCC
1379 %1 A2
1380 mov [A0], rax
1381 mov [A1], rdx
1382 %else
1383 mov T1, A1
1384 %1 A2
1385 mov [A0], rax
1386 mov [T1], rdx
1387 %endif
1388 IEM_SAVE_FLAGS A3, %2, %3
1389 xor eax, eax
1390 EPILOGUE_4_ARGS_EX 12
1391ENDPROC iemAImpl_ %+ %1 %+ _u64
1392 %endif ; !RT_ARCH_AMD64
1393
1394%endmacro
1395
1396IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1397IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1398
1399
1400BEGINCODE
1401;;
1402; Worker function for negating a 32-bit number in T1:T0
1403; @uses None (T0,T1)
1404BEGINPROC iemAImpl_negate_T0_T1_u32
1405 push 0
1406 push 0
1407 xchg T0_32, [xSP]
1408 xchg T1_32, [xSP + xCB]
1409 sub T0_32, [xSP]
1410 sbb T1_32, [xSP + xCB]
1411 add xSP, xCB*2
1412 ret
1413ENDPROC iemAImpl_negate_T0_T1_u32
1414
1415%ifdef RT_ARCH_AMD64
1416;;
1417; Worker function for negating a 64-bit number in T1:T0
1418; @uses None (T0,T1)
1419BEGINPROC iemAImpl_negate_T0_T1_u64
1420 push 0
1421 push 0
1422 xchg T0, [xSP]
1423 xchg T1, [xSP + xCB]
1424 sub T0, [xSP]
1425 sbb T1, [xSP + xCB]
1426 add xSP, xCB*2
1427 ret
1428ENDPROC iemAImpl_negate_T0_T1_u64
1429%endif
1430
1431
1432;;
1433; Macro for implementing a division operations.
1434;
1435; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1436; 32-bit system where the 64-bit accesses requires hand coding.
1437;
1438; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1439; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1440; pointer to eflags in A3.
1441;
1442; The functions all return 0 on success and -1 if a divide error should be
1443; raised by the caller.
1444;
1445; @param 1 The instruction mnemonic.
1446; @param 2 The modified flags.
1447; @param 3 The undefined flags.
1448; @param 4 1 if signed, 0 if unsigned.
1449;
1450; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1451;
1452%macro IEMIMPL_DIV_OP 4
1453BEGINCODE
1454BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1455 PROLOGUE_3_ARGS
1456
1457 ; div by chainsaw check.
1458 test A1_8, A1_8
1459 jz .div_zero
1460
1461 ; Overflow check - unsigned division is simple to verify, haven't
1462 ; found a simple way to check signed division yet unfortunately.
1463 %if %4 == 0
1464 cmp [A0 + 1], A1_8
1465 jae .div_overflow
1466 %else
1467 mov T0_16, [A0] ; T0 = dividend
1468 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1469 test A1_8, A1_8
1470 js .divisor_negative
1471 test T0_16, T0_16
1472 jns .both_positive
1473 neg T0_16
1474.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1475 push T0 ; Start off like unsigned below.
1476 shr T0_16, 7
1477 cmp T0_8, A1_8
1478 pop T0
1479 jb .div_no_overflow
1480 ja .div_overflow
1481 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1482 cmp T0_8, A1_8
1483 jae .div_overflow
1484 jmp .div_no_overflow
1485
1486.divisor_negative:
1487 neg A1_8
1488 test T0_16, T0_16
1489 jns .one_of_each
1490 neg T0_16
1491.both_positive: ; Same as unsigned shifted by sign indicator bit.
1492 shr T0_16, 7
1493 cmp T0_8, A1_8
1494 jae .div_overflow
1495.div_no_overflow:
1496 mov A1, T1 ; restore divisor
1497 %endif
1498
1499 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1500 mov ax, [A0]
1501 %1 A1_8
1502 mov [A0], ax
1503 IEM_SAVE_FLAGS A2, %2, %3
1504 xor eax, eax
1505
1506.return:
1507 EPILOGUE_3_ARGS
1508
1509.div_zero:
1510.div_overflow:
1511 mov eax, -1
1512 jmp .return
1513ENDPROC iemAImpl_ %+ %1 %+ _u8
1514
1515BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1516 PROLOGUE_4_ARGS
1517
1518 ; div by chainsaw check.
1519 test A2_16, A2_16
1520 jz .div_zero
1521
1522 ; Overflow check - unsigned division is simple to verify, haven't
1523 ; found a simple way to check signed division yet unfortunately.
1524 %if %4 == 0
1525 cmp [A1], A2_16
1526 jae .div_overflow
1527 %else
1528 mov T0_16, [A1]
1529 shl T0_32, 16
1530 mov T0_16, [A0] ; T0 = dividend
1531 mov T1, A2 ; T1 = divisor
1532 test T1_16, T1_16
1533 js .divisor_negative
1534 test T0_32, T0_32
1535 jns .both_positive
1536 neg T0_32
1537.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1538 push T0 ; Start off like unsigned below.
1539 shr T0_32, 15
1540 cmp T0_16, T1_16
1541 pop T0
1542 jb .div_no_overflow
1543 ja .div_overflow
1544 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1545 cmp T0_16, T1_16
1546 jae .div_overflow
1547 jmp .div_no_overflow
1548
1549.divisor_negative:
1550 neg T1_16
1551 test T0_32, T0_32
1552 jns .one_of_each
1553 neg T0_32
1554.both_positive: ; Same as unsigned shifted by sign indicator bit.
1555 shr T0_32, 15
1556 cmp T0_16, T1_16
1557 jae .div_overflow
1558.div_no_overflow:
1559 %endif
1560
1561 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1562 %ifdef ASM_CALL64_GCC
1563 mov T1, A2
1564 mov ax, [A0]
1565 mov dx, [A1]
1566 %1 T1_16
1567 mov [A0], ax
1568 mov [A1], dx
1569 %else
1570 mov T1, A1
1571 mov ax, [A0]
1572 mov dx, [T1]
1573 %1 A2_16
1574 mov [A0], ax
1575 mov [T1], dx
1576 %endif
1577 IEM_SAVE_FLAGS A3, %2, %3
1578 xor eax, eax
1579
1580.return:
1581 EPILOGUE_4_ARGS
1582
1583.div_zero:
1584.div_overflow:
1585 mov eax, -1
1586 jmp .return
1587ENDPROC iemAImpl_ %+ %1 %+ _u16
1588
1589BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1590 PROLOGUE_4_ARGS
1591
1592 ; div by chainsaw check.
1593 test A2_32, A2_32
1594 jz .div_zero
1595
1596 ; Overflow check - unsigned division is simple to verify, haven't
1597 ; found a simple way to check signed division yet unfortunately.
1598 %if %4 == 0
1599 cmp [A1], A2_32
1600 jae .div_overflow
1601 %else
1602 push A2 ; save A2 so we modify it (we out of regs on x86).
1603 mov T0_32, [A0] ; T0 = dividend low
1604 mov T1_32, [A1] ; T1 = dividend high
1605 test A2_32, A2_32
1606 js .divisor_negative
1607 test T1_32, T1_32
1608 jns .both_positive
1609 call NAME(iemAImpl_negate_T0_T1_u32)
1610.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1611 push T0 ; Start off like unsigned below.
1612 shl T1_32, 1
1613 shr T0_32, 31
1614 or T1_32, T0_32
1615 cmp T1_32, A2_32
1616 pop T0
1617 jb .div_no_overflow
1618 ja .div_overflow
1619 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1620 cmp T0_32, A2_32
1621 jae .div_overflow
1622 jmp .div_no_overflow
1623
1624.divisor_negative:
1625 neg A2_32
1626 test T1_32, T1_32
1627 jns .one_of_each
1628 call NAME(iemAImpl_negate_T0_T1_u32)
1629.both_positive: ; Same as unsigned shifted by sign indicator bit.
1630 shl T1_32, 1
1631 shr T0_32, 31
1632 or T1_32, T0_32
1633 cmp T1_32, A2_32
1634 jae .div_overflow
1635.div_no_overflow:
1636 pop A2
1637 %endif
1638
1639 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1640 mov eax, [A0]
1641 %ifdef ASM_CALL64_GCC
1642 mov T1, A2
1643 mov eax, [A0]
1644 mov edx, [A1]
1645 %1 T1_32
1646 mov [A0], eax
1647 mov [A1], edx
1648 %else
1649 mov T1, A1
1650 mov eax, [A0]
1651 mov edx, [T1]
1652 %1 A2_32
1653 mov [A0], eax
1654 mov [T1], edx
1655 %endif
1656 IEM_SAVE_FLAGS A3, %2, %3
1657 xor eax, eax
1658
1659.return:
1660 EPILOGUE_4_ARGS
1661
1662.div_overflow:
1663 %if %4 != 0
1664 pop A2
1665 %endif
1666.div_zero:
1667 mov eax, -1
1668 jmp .return
1669ENDPROC iemAImpl_ %+ %1 %+ _u32
1670
1671 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1672BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1673 PROLOGUE_4_ARGS
1674
1675 test A2, A2
1676 jz .div_zero
1677 %if %4 == 0
1678 cmp [A1], A2
1679 jae .div_overflow
1680 %else
1681 push A2 ; save A2 so we modify it (we out of regs on x86).
1682 mov T0, [A0] ; T0 = dividend low
1683 mov T1, [A1] ; T1 = dividend high
1684 test A2, A2
1685 js .divisor_negative
1686 test T1, T1
1687 jns .both_positive
1688 call NAME(iemAImpl_negate_T0_T1_u64)
1689.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1690 push T0 ; Start off like unsigned below.
1691 shl T1, 1
1692 shr T0, 63
1693 or T1, T0
1694 cmp T1, A2
1695 pop T0
1696 jb .div_no_overflow
1697 ja .div_overflow
1698 mov T1, 0x7fffffffffffffff
1699 and T0, T1 ; Special case for covering (divisor - 1).
1700 cmp T0, A2
1701 jae .div_overflow
1702 jmp .div_no_overflow
1703
1704.divisor_negative:
1705 neg A2
1706 test T1, T1
1707 jns .one_of_each
1708 call NAME(iemAImpl_negate_T0_T1_u64)
1709.both_positive: ; Same as unsigned shifted by sign indicator bit.
1710 shl T1, 1
1711 shr T0, 63
1712 or T1, T0
1713 cmp T1, A2
1714 jae .div_overflow
1715.div_no_overflow:
1716 pop A2
1717 %endif
1718
1719 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1720 mov rax, [A0]
1721 %ifdef ASM_CALL64_GCC
1722 mov T1, A2
1723 mov rax, [A0]
1724 mov rdx, [A1]
1725 %1 T1
1726 mov [A0], rax
1727 mov [A1], rdx
1728 %else
1729 mov T1, A1
1730 mov rax, [A0]
1731 mov rdx, [T1]
1732 %1 A2
1733 mov [A0], rax
1734 mov [T1], rdx
1735 %endif
1736 IEM_SAVE_FLAGS A3, %2, %3
1737 xor eax, eax
1738
1739.return:
1740 EPILOGUE_4_ARGS_EX 12
1741
1742.div_overflow:
1743 %if %4 != 0
1744 pop A2
1745 %endif
1746.div_zero:
1747 mov eax, -1
1748 jmp .return
1749ENDPROC iemAImpl_ %+ %1 %+ _u64
1750 %endif ; !RT_ARCH_AMD64
1751
1752%endmacro
1753
1754IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1755IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1756
1757
1758;;
1759; Macro for implementing memory fence operation.
1760;
1761; No return value, no operands or anything.
1762;
1763; @param 1 The instruction.
1764;
1765%macro IEMIMPL_MEM_FENCE 1
1766BEGINCODE
1767BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1768 %1
1769 ret
1770ENDPROC iemAImpl_ %+ %1
1771%endmacro
1772
1773IEMIMPL_MEM_FENCE lfence
1774IEMIMPL_MEM_FENCE sfence
1775IEMIMPL_MEM_FENCE mfence
1776
1777;;
1778; Alternative for non-SSE2 host.
1779;
1780BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1781 push xAX
1782 xchg xAX, [xSP]
1783 add xSP, xCB
1784 ret
1785ENDPROC iemAImpl_alt_mem_fence
1786
1787
1788;;
1789; Initialize the FPU for the actual instruction being emulated, this means
1790; loading parts of the guest's control word and status word.
1791;
1792; @uses 24 bytes of stack.
1793; @param 1 Expression giving the address of the FXSTATE of the guest.
1794;
1795%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1796 fnstenv [xSP]
1797
1798 ; FCW - for exception, precision and rounding control.
1799 movzx T0, word [%1 + X86FXSTATE.FCW]
1800 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1801 mov [xSP + X86FSTENV32P.FCW], T0_16
1802
1803 ; FSW - for undefined C0, C1, C2, and C3.
1804 movzx T1, word [%1 + X86FXSTATE.FSW]
1805 and T1, X86_FSW_C_MASK
1806 movzx T0, word [xSP + X86FSTENV32P.FSW]
1807 and T0, X86_FSW_TOP_MASK
1808 or T0, T1
1809 mov [xSP + X86FSTENV32P.FSW], T0_16
1810
1811 fldenv [xSP]
1812%endmacro
1813
1814
1815;;
1816; Need to move this as well somewhere better?
1817;
1818struc IEMFPURESULT
1819 .r80Result resw 5
1820 .FSW resw 1
1821endstruc
1822
1823
1824;;
1825; Need to move this as well somewhere better?
1826;
1827struc IEMFPURESULTTWO
1828 .r80Result1 resw 5
1829 .FSW resw 1
1830 .r80Result2 resw 5
1831endstruc
1832
1833
1834;
1835;---------------------- 16-bit signed integer operations ----------------------
1836;
1837
1838
1839;;
1840; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1841;
1842; @param A0 FPU context (fxsave).
1843; @param A1 Pointer to a IEMFPURESULT for the output.
1844; @param A2 Pointer to the 16-bit floating point value to convert.
1845;
1846BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1847 PROLOGUE_3_ARGS
1848 sub xSP, 20h
1849
1850 fninit
1851 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1852 fild word [A2]
1853
1854 fnstsw word [A1 + IEMFPURESULT.FSW]
1855 fnclex
1856 fstp tword [A1 + IEMFPURESULT.r80Result]
1857
1858 fninit
1859 add xSP, 20h
1860 EPILOGUE_3_ARGS
1861ENDPROC iemAImpl_fild_i16_to_r80
1862
1863
1864;;
1865; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1866;
1867; @param A0 FPU context (fxsave).
1868; @param A1 Where to return the output FSW.
1869; @param A2 Where to store the 16-bit signed integer value.
1870; @param A3 Pointer to the 80-bit value.
1871;
1872BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1873 PROLOGUE_4_ARGS
1874 sub xSP, 20h
1875
1876 fninit
1877 fld tword [A3]
1878 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1879 fistp word [A2]
1880
1881 fnstsw word [A1]
1882
1883 fninit
1884 add xSP, 20h
1885 EPILOGUE_4_ARGS
1886ENDPROC iemAImpl_fist_r80_to_i16
1887
1888
1889;;
1890; Store a 80-bit floating point value (register) as a 16-bit signed integer
1891; (memory) with truncation.
1892;
1893; @param A0 FPU context (fxsave).
1894; @param A1 Where to return the output FSW.
1895; @param A2 Where to store the 16-bit signed integer value.
1896; @param A3 Pointer to the 80-bit value.
1897;
1898BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1899 PROLOGUE_4_ARGS
1900 sub xSP, 20h
1901
1902 fninit
1903 fld tword [A3]
1904 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1905 fisttp dword [A2]
1906
1907 fnstsw word [A1]
1908
1909 fninit
1910 add xSP, 20h
1911 EPILOGUE_4_ARGS
1912ENDPROC iemAImpl_fistt_r80_to_i16
1913
1914
1915;;
1916; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1917;
1918; @param 1 The instruction
1919;
1920; @param A0 FPU context (fxsave).
1921; @param A1 Pointer to a IEMFPURESULT for the output.
1922; @param A2 Pointer to the 80-bit value.
1923; @param A3 Pointer to the 16-bit value.
1924;
1925%macro IEMIMPL_FPU_R80_BY_I16 1
1926BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1927 PROLOGUE_4_ARGS
1928 sub xSP, 20h
1929
1930 fninit
1931 fld tword [A2]
1932 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1933 %1 word [A3]
1934
1935 fnstsw word [A1 + IEMFPURESULT.FSW]
1936 fnclex
1937 fstp tword [A1 + IEMFPURESULT.r80Result]
1938
1939 fninit
1940 add xSP, 20h
1941 EPILOGUE_4_ARGS
1942ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1943%endmacro
1944
1945IEMIMPL_FPU_R80_BY_I16 fiadd
1946IEMIMPL_FPU_R80_BY_I16 fimul
1947IEMIMPL_FPU_R80_BY_I16 fisub
1948IEMIMPL_FPU_R80_BY_I16 fisubr
1949IEMIMPL_FPU_R80_BY_I16 fidiv
1950IEMIMPL_FPU_R80_BY_I16 fidivr
1951
1952
1953;;
1954; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1955; only returning FSW.
1956;
1957; @param 1 The instruction
1958;
1959; @param A0 FPU context (fxsave).
1960; @param A1 Where to store the output FSW.
1961; @param A2 Pointer to the 80-bit value.
1962; @param A3 Pointer to the 64-bit value.
1963;
1964%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1965BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1966 PROLOGUE_4_ARGS
1967 sub xSP, 20h
1968
1969 fninit
1970 fld tword [A2]
1971 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1972 %1 word [A3]
1973
1974 fnstsw word [A1]
1975
1976 fninit
1977 add xSP, 20h
1978 EPILOGUE_4_ARGS
1979ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1980%endmacro
1981
1982IEMIMPL_FPU_R80_BY_I16_FSW ficom
1983
1984
1985
1986;
1987;---------------------- 32-bit signed integer operations ----------------------
1988;
1989
1990
1991;;
1992; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1993;
1994; @param A0 FPU context (fxsave).
1995; @param A1 Pointer to a IEMFPURESULT for the output.
1996; @param A2 Pointer to the 32-bit floating point value to convert.
1997;
1998BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1999 PROLOGUE_3_ARGS
2000 sub xSP, 20h
2001
2002 fninit
2003 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2004 fild dword [A2]
2005
2006 fnstsw word [A1 + IEMFPURESULT.FSW]
2007 fnclex
2008 fstp tword [A1 + IEMFPURESULT.r80Result]
2009
2010 fninit
2011 add xSP, 20h
2012 EPILOGUE_3_ARGS
2013ENDPROC iemAImpl_fild_i32_to_r80
2014
2015
2016;;
2017; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2018;
2019; @param A0 FPU context (fxsave).
2020; @param A1 Where to return the output FSW.
2021; @param A2 Where to store the 32-bit signed integer value.
2022; @param A3 Pointer to the 80-bit value.
2023;
2024BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2025 PROLOGUE_4_ARGS
2026 sub xSP, 20h
2027
2028 fninit
2029 fld tword [A3]
2030 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2031 fistp dword [A2]
2032
2033 fnstsw word [A1]
2034
2035 fninit
2036 add xSP, 20h
2037 EPILOGUE_4_ARGS
2038ENDPROC iemAImpl_fist_r80_to_i32
2039
2040
2041;;
2042; Store a 80-bit floating point value (register) as a 32-bit signed integer
2043; (memory) with truncation.
2044;
2045; @param A0 FPU context (fxsave).
2046; @param A1 Where to return the output FSW.
2047; @param A2 Where to store the 32-bit signed integer value.
2048; @param A3 Pointer to the 80-bit value.
2049;
2050BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2051 PROLOGUE_4_ARGS
2052 sub xSP, 20h
2053
2054 fninit
2055 fld tword [A3]
2056 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2057 fisttp dword [A2]
2058
2059 fnstsw word [A1]
2060
2061 fninit
2062 add xSP, 20h
2063 EPILOGUE_4_ARGS
2064ENDPROC iemAImpl_fistt_r80_to_i32
2065
2066
2067;;
2068; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2069;
2070; @param 1 The instruction
2071;
2072; @param A0 FPU context (fxsave).
2073; @param A1 Pointer to a IEMFPURESULT for the output.
2074; @param A2 Pointer to the 80-bit value.
2075; @param A3 Pointer to the 32-bit value.
2076;
2077%macro IEMIMPL_FPU_R80_BY_I32 1
2078BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2079 PROLOGUE_4_ARGS
2080 sub xSP, 20h
2081
2082 fninit
2083 fld tword [A2]
2084 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2085 %1 dword [A3]
2086
2087 fnstsw word [A1 + IEMFPURESULT.FSW]
2088 fnclex
2089 fstp tword [A1 + IEMFPURESULT.r80Result]
2090
2091 fninit
2092 add xSP, 20h
2093 EPILOGUE_4_ARGS
2094ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2095%endmacro
2096
2097IEMIMPL_FPU_R80_BY_I32 fiadd
2098IEMIMPL_FPU_R80_BY_I32 fimul
2099IEMIMPL_FPU_R80_BY_I32 fisub
2100IEMIMPL_FPU_R80_BY_I32 fisubr
2101IEMIMPL_FPU_R80_BY_I32 fidiv
2102IEMIMPL_FPU_R80_BY_I32 fidivr
2103
2104
2105;;
2106; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2107; only returning FSW.
2108;
2109; @param 1 The instruction
2110;
2111; @param A0 FPU context (fxsave).
2112; @param A1 Where to store the output FSW.
2113; @param A2 Pointer to the 80-bit value.
2114; @param A3 Pointer to the 64-bit value.
2115;
2116%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2117BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2118 PROLOGUE_4_ARGS
2119 sub xSP, 20h
2120
2121 fninit
2122 fld tword [A2]
2123 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2124 %1 dword [A3]
2125
2126 fnstsw word [A1]
2127
2128 fninit
2129 add xSP, 20h
2130 EPILOGUE_4_ARGS
2131ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2132%endmacro
2133
2134IEMIMPL_FPU_R80_BY_I32_FSW ficom
2135
2136
2137
2138;
2139;---------------------- 64-bit signed integer operations ----------------------
2140;
2141
2142
2143;;
2144; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2145;
2146; @param A0 FPU context (fxsave).
2147; @param A1 Pointer to a IEMFPURESULT for the output.
2148; @param A2 Pointer to the 64-bit floating point value to convert.
2149;
2150BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2151 PROLOGUE_3_ARGS
2152 sub xSP, 20h
2153
2154 fninit
2155 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2156 fild qword [A2]
2157
2158 fnstsw word [A1 + IEMFPURESULT.FSW]
2159 fnclex
2160 fstp tword [A1 + IEMFPURESULT.r80Result]
2161
2162 fninit
2163 add xSP, 20h
2164 EPILOGUE_3_ARGS
2165ENDPROC iemAImpl_fild_i64_to_r80
2166
2167
2168;;
2169; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2170;
2171; @param A0 FPU context (fxsave).
2172; @param A1 Where to return the output FSW.
2173; @param A2 Where to store the 64-bit signed integer value.
2174; @param A3 Pointer to the 80-bit value.
2175;
2176BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2177 PROLOGUE_4_ARGS
2178 sub xSP, 20h
2179
2180 fninit
2181 fld tword [A3]
2182 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2183 fistp qword [A2]
2184
2185 fnstsw word [A1]
2186
2187 fninit
2188 add xSP, 20h
2189 EPILOGUE_4_ARGS
2190ENDPROC iemAImpl_fist_r80_to_i64
2191
2192
2193;;
2194; Store a 80-bit floating point value (register) as a 64-bit signed integer
2195; (memory) with truncation.
2196;
2197; @param A0 FPU context (fxsave).
2198; @param A1 Where to return the output FSW.
2199; @param A2 Where to store the 64-bit signed integer value.
2200; @param A3 Pointer to the 80-bit value.
2201;
2202BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2203 PROLOGUE_4_ARGS
2204 sub xSP, 20h
2205
2206 fninit
2207 fld tword [A3]
2208 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2209 fisttp qword [A2]
2210
2211 fnstsw word [A1]
2212
2213 fninit
2214 add xSP, 20h
2215 EPILOGUE_4_ARGS
2216ENDPROC iemAImpl_fistt_r80_to_i64
2217
2218
2219
2220;
2221;---------------------- 32-bit floating point operations ----------------------
2222;
2223
2224;;
2225; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2226;
2227; @param A0 FPU context (fxsave).
2228; @param A1 Pointer to a IEMFPURESULT for the output.
2229; @param A2 Pointer to the 32-bit floating point value to convert.
2230;
2231BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2232 PROLOGUE_3_ARGS
2233 sub xSP, 20h
2234
2235 fninit
2236 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2237 fld dword [A2]
2238
2239 fnstsw word [A1 + IEMFPURESULT.FSW]
2240 fnclex
2241 fstp tword [A1 + IEMFPURESULT.r80Result]
2242
2243 fninit
2244 add xSP, 20h
2245 EPILOGUE_3_ARGS
2246ENDPROC iemAImpl_fld_r32_to_r80
2247
2248
2249;;
2250; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2251;
2252; @param A0 FPU context (fxsave).
2253; @param A1 Where to return the output FSW.
2254; @param A2 Where to store the 32-bit value.
2255; @param A3 Pointer to the 80-bit value.
2256;
2257BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2258 PROLOGUE_4_ARGS
2259 sub xSP, 20h
2260
2261 fninit
2262 fld tword [A3]
2263 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2264 fst dword [A2]
2265
2266 fnstsw word [A1]
2267
2268 fninit
2269 add xSP, 20h
2270 EPILOGUE_4_ARGS
2271ENDPROC iemAImpl_fst_r80_to_r32
2272
2273
2274;;
2275; FPU instruction working on one 80-bit and one 32-bit floating point value.
2276;
2277; @param 1 The instruction
2278;
2279; @param A0 FPU context (fxsave).
2280; @param A1 Pointer to a IEMFPURESULT for the output.
2281; @param A2 Pointer to the 80-bit value.
2282; @param A3 Pointer to the 32-bit value.
2283;
2284%macro IEMIMPL_FPU_R80_BY_R32 1
2285BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2286 PROLOGUE_4_ARGS
2287 sub xSP, 20h
2288
2289 fninit
2290 fld tword [A2]
2291 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2292 %1 dword [A3]
2293
2294 fnstsw word [A1 + IEMFPURESULT.FSW]
2295 fnclex
2296 fstp tword [A1 + IEMFPURESULT.r80Result]
2297
2298 fninit
2299 add xSP, 20h
2300 EPILOGUE_4_ARGS
2301ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2302%endmacro
2303
2304IEMIMPL_FPU_R80_BY_R32 fadd
2305IEMIMPL_FPU_R80_BY_R32 fmul
2306IEMIMPL_FPU_R80_BY_R32 fsub
2307IEMIMPL_FPU_R80_BY_R32 fsubr
2308IEMIMPL_FPU_R80_BY_R32 fdiv
2309IEMIMPL_FPU_R80_BY_R32 fdivr
2310
2311
2312;;
2313; FPU instruction working on one 80-bit and one 32-bit floating point value,
2314; only returning FSW.
2315;
2316; @param 1 The instruction
2317;
2318; @param A0 FPU context (fxsave).
2319; @param A1 Where to store the output FSW.
2320; @param A2 Pointer to the 80-bit value.
2321; @param A3 Pointer to the 64-bit value.
2322;
2323%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2324BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2325 PROLOGUE_4_ARGS
2326 sub xSP, 20h
2327
2328 fninit
2329 fld tword [A2]
2330 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2331 %1 dword [A3]
2332
2333 fnstsw word [A1]
2334
2335 fninit
2336 add xSP, 20h
2337 EPILOGUE_4_ARGS
2338ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2339%endmacro
2340
2341IEMIMPL_FPU_R80_BY_R32_FSW fcom
2342
2343
2344
2345;
2346;---------------------- 64-bit floating point operations ----------------------
2347;
2348
2349;;
2350; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2351;
2352; @param A0 FPU context (fxsave).
2353; @param A1 Pointer to a IEMFPURESULT for the output.
2354; @param A2 Pointer to the 64-bit floating point value to convert.
2355;
2356BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2357 PROLOGUE_3_ARGS
2358 sub xSP, 20h
2359
2360 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2361 fld qword [A2]
2362
2363 fnstsw word [A1 + IEMFPURESULT.FSW]
2364 fnclex
2365 fstp tword [A1 + IEMFPURESULT.r80Result]
2366
2367 fninit
2368 add xSP, 20h
2369 EPILOGUE_3_ARGS
2370ENDPROC iemAImpl_fld_r64_to_r80
2371
2372
2373;;
2374; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2375;
2376; @param A0 FPU context (fxsave).
2377; @param A1 Where to return the output FSW.
2378; @param A2 Where to store the 64-bit value.
2379; @param A3 Pointer to the 80-bit value.
2380;
2381BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2382 PROLOGUE_4_ARGS
2383 sub xSP, 20h
2384
2385 fninit
2386 fld tword [A3]
2387 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2388 fst qword [A2]
2389
2390 fnstsw word [A1]
2391
2392 fninit
2393 add xSP, 20h
2394 EPILOGUE_4_ARGS
2395ENDPROC iemAImpl_fst_r80_to_r64
2396
2397
2398;;
2399; FPU instruction working on one 80-bit and one 64-bit floating point value.
2400;
2401; @param 1 The instruction
2402;
2403; @param A0 FPU context (fxsave).
2404; @param A1 Pointer to a IEMFPURESULT for the output.
2405; @param A2 Pointer to the 80-bit value.
2406; @param A3 Pointer to the 64-bit value.
2407;
2408%macro IEMIMPL_FPU_R80_BY_R64 1
2409BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2410 PROLOGUE_4_ARGS
2411 sub xSP, 20h
2412
2413 fninit
2414 fld tword [A2]
2415 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2416 %1 qword [A3]
2417
2418 fnstsw word [A1 + IEMFPURESULT.FSW]
2419 fnclex
2420 fstp tword [A1 + IEMFPURESULT.r80Result]
2421
2422 fninit
2423 add xSP, 20h
2424 EPILOGUE_4_ARGS
2425ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2426%endmacro
2427
2428IEMIMPL_FPU_R80_BY_R64 fadd
2429IEMIMPL_FPU_R80_BY_R64 fmul
2430IEMIMPL_FPU_R80_BY_R64 fsub
2431IEMIMPL_FPU_R80_BY_R64 fsubr
2432IEMIMPL_FPU_R80_BY_R64 fdiv
2433IEMIMPL_FPU_R80_BY_R64 fdivr
2434
2435;;
2436; FPU instruction working on one 80-bit and one 64-bit floating point value,
2437; only returning FSW.
2438;
2439; @param 1 The instruction
2440;
2441; @param A0 FPU context (fxsave).
2442; @param A1 Where to store the output FSW.
2443; @param A2 Pointer to the 80-bit value.
2444; @param A3 Pointer to the 64-bit value.
2445;
2446%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2447BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2448 PROLOGUE_4_ARGS
2449 sub xSP, 20h
2450
2451 fninit
2452 fld tword [A2]
2453 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2454 %1 qword [A3]
2455
2456 fnstsw word [A1]
2457
2458 fninit
2459 add xSP, 20h
2460 EPILOGUE_4_ARGS
2461ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2462%endmacro
2463
2464IEMIMPL_FPU_R80_BY_R64_FSW fcom
2465
2466
2467
2468;
2469;---------------------- 80-bit floating point operations ----------------------
2470;
2471
2472;;
2473; Loads a 80-bit floating point register value from memory.
2474;
2475; @param A0 FPU context (fxsave).
2476; @param A1 Pointer to a IEMFPURESULT for the output.
2477; @param A2 Pointer to the 80-bit floating point value to load.
2478;
2479BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2480 PROLOGUE_3_ARGS
2481 sub xSP, 20h
2482
2483 fninit
2484 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2485 fld tword [A2]
2486
2487 fnstsw word [A1 + IEMFPURESULT.FSW]
2488 fnclex
2489 fstp tword [A1 + IEMFPURESULT.r80Result]
2490
2491 fninit
2492 add xSP, 20h
2493 EPILOGUE_3_ARGS
2494ENDPROC iemAImpl_fld_r80_from_r80
2495
2496
2497;;
2498; Store a 80-bit floating point register to memory
2499;
2500; @param A0 FPU context (fxsave).
2501; @param A1 Where to return the output FSW.
2502; @param A2 Where to store the 80-bit value.
2503; @param A3 Pointer to the 80-bit register value.
2504;
2505BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2506 PROLOGUE_4_ARGS
2507 sub xSP, 20h
2508
2509 fninit
2510 fld tword [A3]
2511 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2512 fstp tword [A2]
2513
2514 fnstsw word [A1]
2515
2516 fninit
2517 add xSP, 20h
2518 EPILOGUE_4_ARGS
2519ENDPROC iemAImpl_fst_r80_to_r80
2520
2521
2522;;
2523; Loads an 80-bit floating point register value in BCD format from memory.
2524;
2525; @param A0 FPU context (fxsave).
2526; @param A1 Pointer to a IEMFPURESULT for the output.
2527; @param A2 Pointer to the 80-bit BCD value to load.
2528;
2529BEGINPROC_FASTCALL iemAImpl_fld_r80_from_d80, 12
2530 PROLOGUE_3_ARGS
2531 sub xSP, 20h
2532
2533 fninit
2534 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2535 fbld tword [A2]
2536
2537 fnstsw word [A1 + IEMFPURESULT.FSW]
2538 fnclex
2539 fstp tword [A1 + IEMFPURESULT.r80Result]
2540
2541 fninit
2542 add xSP, 20h
2543 EPILOGUE_3_ARGS
2544ENDPROC iemAImpl_fld_r80_from_d80
2545
2546
2547;;
2548; Store a 80-bit floating point register to memory as BCD
2549;
2550; @param A0 FPU context (fxsave).
2551; @param A1 Where to return the output FSW.
2552; @param A2 Where to store the 80-bit BCD value.
2553; @param A3 Pointer to the 80-bit register value.
2554;
2555BEGINPROC_FASTCALL iemAImpl_fst_r80_to_d80, 16
2556 PROLOGUE_4_ARGS
2557 sub xSP, 20h
2558
2559 fninit
2560 fld tword [A3]
2561 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2562 fbstp tword [A2]
2563
2564 fnstsw word [A1]
2565
2566 fninit
2567 add xSP, 20h
2568 EPILOGUE_4_ARGS
2569ENDPROC iemAImpl_fst_r80_to_d80
2570
2571
2572;;
2573; FPU instruction working on two 80-bit floating point values.
2574;
2575; @param 1 The instruction
2576;
2577; @param A0 FPU context (fxsave).
2578; @param A1 Pointer to a IEMFPURESULT for the output.
2579; @param A2 Pointer to the first 80-bit value (ST0)
2580; @param A3 Pointer to the second 80-bit value (STn).
2581;
2582%macro IEMIMPL_FPU_R80_BY_R80 2
2583BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2584 PROLOGUE_4_ARGS
2585 sub xSP, 20h
2586
2587 fninit
2588 fld tword [A3]
2589 fld tword [A2]
2590 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2591 %1 %2
2592
2593 fnstsw word [A1 + IEMFPURESULT.FSW]
2594 fnclex
2595 fstp tword [A1 + IEMFPURESULT.r80Result]
2596
2597 fninit
2598 add xSP, 20h
2599 EPILOGUE_4_ARGS
2600ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2601%endmacro
2602
2603IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2604IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2605IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2606IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2607IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2608IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2609IEMIMPL_FPU_R80_BY_R80 fprem, {}
2610IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2611IEMIMPL_FPU_R80_BY_R80 fscale, {}
2612
2613
2614;;
2615; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2616; storing the result in ST1 and popping the stack.
2617;
2618; @param 1 The instruction
2619;
2620; @param A0 FPU context (fxsave).
2621; @param A1 Pointer to a IEMFPURESULT for the output.
2622; @param A2 Pointer to the first 80-bit value (ST1).
2623; @param A3 Pointer to the second 80-bit value (ST0).
2624;
2625%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2626BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2627 PROLOGUE_4_ARGS
2628 sub xSP, 20h
2629
2630 fninit
2631 fld tword [A2]
2632 fld tword [A3]
2633 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2634 %1
2635
2636 fnstsw word [A1 + IEMFPURESULT.FSW]
2637 fnclex
2638 fstp tword [A1 + IEMFPURESULT.r80Result]
2639
2640 fninit
2641 add xSP, 20h
2642 EPILOGUE_4_ARGS
2643ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2644%endmacro
2645
2646IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2647IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2648IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2649
2650
2651;;
2652; FPU instruction working on two 80-bit floating point values, only
2653; returning FSW.
2654;
2655; @param 1 The instruction
2656;
2657; @param A0 FPU context (fxsave).
2658; @param A1 Pointer to a uint16_t for the resulting FSW.
2659; @param A2 Pointer to the first 80-bit value.
2660; @param A3 Pointer to the second 80-bit value.
2661;
2662%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2663BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2664 PROLOGUE_4_ARGS
2665 sub xSP, 20h
2666
2667 fninit
2668 fld tword [A3]
2669 fld tword [A2]
2670 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2671 %1 st0, st1
2672
2673 fnstsw word [A1]
2674
2675 fninit
2676 add xSP, 20h
2677 EPILOGUE_4_ARGS
2678ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2679%endmacro
2680
2681IEMIMPL_FPU_R80_BY_R80_FSW fcom
2682IEMIMPL_FPU_R80_BY_R80_FSW fucom
2683
2684
2685;;
2686; FPU instruction working on two 80-bit floating point values,
2687; returning FSW and EFLAGS (eax).
2688;
2689; @param 1 The instruction
2690;
2691; @returns EFLAGS in EAX.
2692; @param A0 FPU context (fxsave).
2693; @param A1 Pointer to a uint16_t for the resulting FSW.
2694; @param A2 Pointer to the first 80-bit value.
2695; @param A3 Pointer to the second 80-bit value.
2696;
2697%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2698BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2699 PROLOGUE_4_ARGS
2700 sub xSP, 20h
2701
2702 fninit
2703 fld tword [A3]
2704 fld tword [A2]
2705 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2706 %1 st1
2707
2708 fnstsw word [A1]
2709 pushf
2710 pop xAX
2711
2712 fninit
2713 add xSP, 20h
2714 EPILOGUE_4_ARGS
2715ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2716%endmacro
2717
2718IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2719IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2720
2721
2722;;
2723; FPU instruction working on one 80-bit floating point value.
2724;
2725; @param 1 The instruction
2726;
2727; @param A0 FPU context (fxsave).
2728; @param A1 Pointer to a IEMFPURESULT for the output.
2729; @param A2 Pointer to the 80-bit value.
2730;
2731%macro IEMIMPL_FPU_R80 1
2732BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2733 PROLOGUE_3_ARGS
2734 sub xSP, 20h
2735
2736 fninit
2737 fld tword [A2]
2738 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2739 %1
2740
2741 fnstsw word [A1 + IEMFPURESULT.FSW]
2742 fnclex
2743 fstp tword [A1 + IEMFPURESULT.r80Result]
2744
2745 fninit
2746 add xSP, 20h
2747 EPILOGUE_3_ARGS
2748ENDPROC iemAImpl_ %+ %1 %+ _r80
2749%endmacro
2750
2751IEMIMPL_FPU_R80 fchs
2752IEMIMPL_FPU_R80 fabs
2753IEMIMPL_FPU_R80 f2xm1
2754IEMIMPL_FPU_R80 fsqrt
2755IEMIMPL_FPU_R80 frndint
2756IEMIMPL_FPU_R80 fsin
2757IEMIMPL_FPU_R80 fcos
2758
2759
2760;;
2761; FPU instruction working on one 80-bit floating point value, only
2762; returning FSW.
2763;
2764; @param 1 The instruction
2765;
2766; @param A0 FPU context (fxsave).
2767; @param A1 Pointer to a uint16_t for the resulting FSW.
2768; @param A2 Pointer to the 80-bit value.
2769;
2770%macro IEMIMPL_FPU_R80_FSW 1
2771BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2772 PROLOGUE_3_ARGS
2773 sub xSP, 20h
2774
2775 fninit
2776 fld tword [A2]
2777 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2778 %1
2779
2780 fnstsw word [A1]
2781
2782 fninit
2783 add xSP, 20h
2784 EPILOGUE_3_ARGS
2785ENDPROC iemAImpl_ %+ %1 %+ _r80
2786%endmacro
2787
2788IEMIMPL_FPU_R80_FSW ftst
2789IEMIMPL_FPU_R80_FSW fxam
2790
2791
2792
2793;;
2794; FPU instruction loading a 80-bit floating point constant.
2795;
2796; @param 1 The instruction
2797;
2798; @param A0 FPU context (fxsave).
2799; @param A1 Pointer to a IEMFPURESULT for the output.
2800;
2801%macro IEMIMPL_FPU_R80_CONST 1
2802BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2803 PROLOGUE_2_ARGS
2804 sub xSP, 20h
2805
2806 fninit
2807 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2808 %1
2809
2810 fnstsw word [A1 + IEMFPURESULT.FSW]
2811 fnclex
2812 fstp tword [A1 + IEMFPURESULT.r80Result]
2813
2814 fninit
2815 add xSP, 20h
2816 EPILOGUE_2_ARGS
2817ENDPROC iemAImpl_ %+ %1 %+
2818%endmacro
2819
2820IEMIMPL_FPU_R80_CONST fld1
2821IEMIMPL_FPU_R80_CONST fldl2t
2822IEMIMPL_FPU_R80_CONST fldl2e
2823IEMIMPL_FPU_R80_CONST fldpi
2824IEMIMPL_FPU_R80_CONST fldlg2
2825IEMIMPL_FPU_R80_CONST fldln2
2826IEMIMPL_FPU_R80_CONST fldz
2827
2828
2829;;
2830; FPU instruction working on one 80-bit floating point value, outputing two.
2831;
2832; @param 1 The instruction
2833;
2834; @param A0 FPU context (fxsave).
2835; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2836; @param A2 Pointer to the 80-bit value.
2837;
2838%macro IEMIMPL_FPU_R80_R80 1
2839BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2840 PROLOGUE_3_ARGS
2841 sub xSP, 20h
2842
2843 fninit
2844 fld tword [A2]
2845 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2846 %1
2847
2848 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2849 fnclex
2850 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2851 fnclex
2852 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2853
2854 fninit
2855 add xSP, 20h
2856 EPILOGUE_3_ARGS
2857ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2858%endmacro
2859
2860IEMIMPL_FPU_R80_R80 fptan
2861IEMIMPL_FPU_R80_R80 fxtract
2862IEMIMPL_FPU_R80_R80 fsincos
2863
2864
2865
2866
2867;---------------------- SSE and MMX Operations ----------------------
2868
2869;; @todo what do we need to do for MMX?
2870%macro IEMIMPL_MMX_PROLOGUE 0
2871%endmacro
2872%macro IEMIMPL_MMX_EPILOGUE 0
2873%endmacro
2874
2875;; @todo what do we need to do for SSE?
2876%macro IEMIMPL_SSE_PROLOGUE 0
2877%endmacro
2878%macro IEMIMPL_SSE_EPILOGUE 0
2879%endmacro
2880
2881
2882;;
2883; Media instruction working on two full sized registers.
2884;
2885; @param 1 The instruction
2886;
2887; @param A0 FPU context (fxsave).
2888; @param A1 Pointer to the first media register size operand (input/output).
2889; @param A2 Pointer to the second media register size operand (input).
2890;
2891%macro IEMIMPL_MEDIA_F2 1
2892BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2893 PROLOGUE_3_ARGS
2894 IEMIMPL_MMX_PROLOGUE
2895
2896 movq mm0, [A1]
2897 movq mm1, [A2]
2898 %1 mm0, mm1
2899 movq [A1], mm0
2900
2901 IEMIMPL_MMX_EPILOGUE
2902 EPILOGUE_3_ARGS
2903ENDPROC iemAImpl_ %+ %1 %+ _u64
2904
2905BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2906 PROLOGUE_3_ARGS
2907 IEMIMPL_SSE_PROLOGUE
2908
2909 movdqu xmm0, [A1]
2910 movdqu xmm1, [A2]
2911 %1 xmm0, xmm1
2912 movdqu [A1], xmm0
2913
2914 IEMIMPL_SSE_EPILOGUE
2915 EPILOGUE_3_ARGS
2916ENDPROC iemAImpl_ %+ %1 %+ _u128
2917%endmacro
2918
2919IEMIMPL_MEDIA_F2 pxor
2920IEMIMPL_MEDIA_F2 pcmpeqb
2921IEMIMPL_MEDIA_F2 pcmpeqw
2922IEMIMPL_MEDIA_F2 pcmpeqd
2923
2924
2925;;
2926; Media instruction working on one full sized and one half sized register (lower half).
2927;
2928; @param 1 The instruction
2929; @param 2 1 if MMX is included, 0 if not.
2930;
2931; @param A0 FPU context (fxsave).
2932; @param A1 Pointer to the first full sized media register operand (input/output).
2933; @param A2 Pointer to the second half sized media register operand (input).
2934;
2935%macro IEMIMPL_MEDIA_F1L1 2
2936 %if %2 != 0
2937BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2938 PROLOGUE_3_ARGS
2939 IEMIMPL_MMX_PROLOGUE
2940
2941 movq mm0, [A1]
2942 movd mm1, [A2]
2943 %1 mm0, mm1
2944 movq [A1], mm0
2945
2946 IEMIMPL_MMX_EPILOGUE
2947 EPILOGUE_3_ARGS
2948ENDPROC iemAImpl_ %+ %1 %+ _u64
2949 %endif
2950
2951BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2952 PROLOGUE_3_ARGS
2953 IEMIMPL_SSE_PROLOGUE
2954
2955 movdqu xmm0, [A1]
2956 movq xmm1, [A2]
2957 %1 xmm0, xmm1
2958 movdqu [A1], xmm0
2959
2960 IEMIMPL_SSE_EPILOGUE
2961 EPILOGUE_3_ARGS
2962ENDPROC iemAImpl_ %+ %1 %+ _u128
2963%endmacro
2964
2965IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2966IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2967IEMIMPL_MEDIA_F1L1 punpckldq, 1
2968IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2969
2970
2971;;
2972; Media instruction working on one full sized and one half sized register (high half).
2973;
2974; @param 1 The instruction
2975; @param 2 1 if MMX is included, 0 if not.
2976;
2977; @param A0 FPU context (fxsave).
2978; @param A1 Pointer to the first full sized media register operand (input/output).
2979; @param A2 Pointer to the second full sized media register operand, where we
2980; will only use the upper half (input).
2981;
2982%macro IEMIMPL_MEDIA_F1H1 2
2983 %if %2 != 0
2984BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2985 PROLOGUE_3_ARGS
2986 IEMIMPL_MMX_PROLOGUE
2987
2988 movq mm0, [A1]
2989 movq mm1, [A2]
2990 %1 mm0, mm1
2991 movq [A1], mm0
2992
2993 IEMIMPL_MMX_EPILOGUE
2994 EPILOGUE_3_ARGS
2995ENDPROC iemAImpl_ %+ %1 %+ _u64
2996 %endif
2997
2998BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2999 PROLOGUE_3_ARGS
3000 IEMIMPL_SSE_PROLOGUE
3001
3002 movdqu xmm0, [A1]
3003 movdqu xmm1, [A2]
3004 %1 xmm0, xmm1
3005 movdqu [A1], xmm0
3006
3007 IEMIMPL_SSE_EPILOGUE
3008 EPILOGUE_3_ARGS
3009ENDPROC iemAImpl_ %+ %1 %+ _u128
3010%endmacro
3011
3012IEMIMPL_MEDIA_F1L1 punpckhbw, 1
3013IEMIMPL_MEDIA_F1L1 punpckhwd, 1
3014IEMIMPL_MEDIA_F1L1 punpckhdq, 1
3015IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
3016
3017
3018;
3019; Shufflers with evil 8-bit immediates.
3020;
3021
3022BEGINPROC_FASTCALL iemAImpl_pshufw, 16
3023 PROLOGUE_4_ARGS
3024 IEMIMPL_MMX_PROLOGUE
3025
3026 movq mm0, [A1]
3027 movq mm1, [A2]
3028 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
3029 lea T1, [.imm0 xWrtRIP]
3030 lea T1, [T1 + T0]
3031 call T1
3032 movq [A1], mm0
3033
3034 IEMIMPL_MMX_EPILOGUE
3035 EPILOGUE_4_ARGS
3036%assign bImm 0
3037%rep 256
3038.imm %+ bImm:
3039 pshufw mm0, mm1, bImm
3040 ret
3041 %assign bImm bImm + 1
3042%endrep
3043.immEnd: ; 256*5 == 0x500
3044dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3045dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3046ENDPROC iemAImpl_pshufw
3047
3048
3049%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3050BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
3051 PROLOGUE_4_ARGS
3052 IEMIMPL_SSE_PROLOGUE
3053
3054 movdqu xmm0, [A1]
3055 movdqu xmm1, [A2]
3056 lea T1, [.imm0 xWrtRIP]
3057 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
3058 lea T1, [T1 + T0*2]
3059 call T1
3060 movdqu [A1], xmm0
3061
3062 IEMIMPL_SSE_EPILOGUE
3063 EPILOGUE_4_ARGS
3064 %assign bImm 0
3065 %rep 256
3066.imm %+ bImm:
3067 %1 xmm0, xmm1, bImm
3068 ret
3069 %assign bImm bImm + 1
3070 %endrep
3071.immEnd: ; 256*6 == 0x600
3072dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3073dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3074ENDPROC iemAImpl_ %+ %1
3075%endmacro
3076
3077IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3078IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3079IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3080
3081
3082;
3083; Move byte mask.
3084;
3085
3086BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
3087 PROLOGUE_3_ARGS
3088 IEMIMPL_MMX_PROLOGUE
3089
3090 mov T0, [A1]
3091 movq mm1, [A2]
3092 pmovmskb T0, mm1
3093 mov [A1], T0
3094%ifdef RT_ARCH_X86
3095 mov dword [A1 + 4], 0
3096%endif
3097 IEMIMPL_MMX_EPILOGUE
3098 EPILOGUE_3_ARGS
3099ENDPROC iemAImpl_pmovmskb_u64
3100
3101BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3102 PROLOGUE_3_ARGS
3103 IEMIMPL_SSE_PROLOGUE
3104
3105 mov T0, [A1]
3106 movdqu xmm1, [A2]
3107 pmovmskb T0, xmm1
3108 mov [A1], T0
3109%ifdef RT_ARCH_X86
3110 mov dword [A1 + 4], 0
3111%endif
3112 IEMIMPL_SSE_EPILOGUE
3113 EPILOGUE_3_ARGS
3114ENDPROC iemAImpl_pmovmskb_u128
3115
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette