VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 93539

Last change on this file since 93539 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 82.8 KB
Line 
1; $Id: IEMAllAImpl.asm 93115 2022-01-01 11:31:46Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20; Header Files ;
21;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28; Defined Constants And Macros ;
29;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175%else
176 ; x86
177 %macro PROLOGUE_1_ARGS 0
178 push edi
179 %endmacro
180 %macro EPILOGUE_1_ARGS 0
181 pop edi
182 ret 0
183 %endmacro
184 %macro EPILOGUE_1_ARGS_EX 1
185 pop edi
186 ret %1
187 %endmacro
188
189 %macro PROLOGUE_2_ARGS 0
190 push edi
191 %endmacro
192 %macro EPILOGUE_2_ARGS 0
193 pop edi
194 ret 0
195 %endmacro
196 %macro EPILOGUE_2_ARGS_EX 1
197 pop edi
198 ret %1
199 %endmacro
200
201 %macro PROLOGUE_3_ARGS 0
202 push ebx
203 mov ebx, [esp + 4 + 4]
204 push edi
205 %endmacro
206 %macro EPILOGUE_3_ARGS_EX 1
207 %if (%1) < 4
208 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
209 %endif
210 pop edi
211 pop ebx
212 ret %1
213 %endmacro
214 %macro EPILOGUE_3_ARGS 0
215 EPILOGUE_3_ARGS_EX 4
216 %endmacro
217
218 %macro PROLOGUE_4_ARGS 0
219 push ebx
220 push edi
221 push esi
222 mov ebx, [esp + 12 + 4 + 0]
223 mov esi, [esp + 12 + 4 + 4]
224 %endmacro
225 %macro EPILOGUE_4_ARGS_EX 1
226 %if (%1) < 8
227 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
228 %endif
229 pop esi
230 pop edi
231 pop ebx
232 ret %1
233 %endmacro
234 %macro EPILOGUE_4_ARGS 0
235 EPILOGUE_4_ARGS_EX 8
236 %endmacro
237
238 %define A0 ecx
239 %define A0_32 ecx
240 %define A0_16 cx
241 %define A0_8 cl
242
243 %define A1 edx
244 %define A1_32 edx
245 %define A1_16 dx
246 %define A1_8 dl
247
248 %define A2 ebx
249 %define A2_32 ebx
250 %define A2_16 bx
251 %define A2_8 bl
252
253 %define A3 esi
254 %define A3_32 esi
255 %define A3_16 si
256
257 %define T0 eax
258 %define T0_32 eax
259 %define T0_16 ax
260 %define T0_8 al
261
262 %define T1 edi
263 %define T1_32 edi
264 %define T1_16 di
265%endif
266
267
268;;
269; Load the relevant flags from [%1] if there are undefined flags (%3).
270;
271; @remarks Clobbers T0, stack. Changes EFLAGS.
272; @param A2 The register pointing to the flags.
273; @param 1 The parameter (A0..A3) pointing to the eflags.
274; @param 2 The set of modified flags.
275; @param 3 The set of undefined flags.
276;
277%macro IEM_MAYBE_LOAD_FLAGS 3
278 ;%if (%3) != 0
279 pushf ; store current flags
280 mov T0_32, [%1] ; load the guest flags
281 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
282 and T0_32, (%2 | %3) ; select the modified and undefined flags.
283 or [xSP], T0 ; merge guest flags with host flags.
284 popf ; load the mixed flags.
285 ;%endif
286%endmacro
287
288;;
289; Update the flag.
290;
291; @remarks Clobbers T0, T1, stack.
292; @param 1 The register pointing to the EFLAGS.
293; @param 2 The mask of modified flags to save.
294; @param 3 The mask of undefined flags to (maybe) save.
295;
296%macro IEM_SAVE_FLAGS 3
297 %if (%2 | %3) != 0
298 pushf
299 pop T1
300 mov T0_32, [%1] ; flags
301 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
302 and T1_32, (%2 | %3) ; select the modified and undefined flags.
303 or T0_32, T1_32 ; combine the flags.
304 mov [%1], T0_32 ; save the flags.
305 %endif
306%endmacro
307
308
309;;
310; Macro for implementing a binary operator.
311;
312; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
313; variants, except on 32-bit system where the 64-bit accesses requires hand
314; coding.
315;
316; All the functions takes a pointer to the destination memory operand in A0,
317; the source register operand in A1 and a pointer to eflags in A2.
318;
319; @param 1 The instruction mnemonic.
320; @param 2 Non-zero if there should be a locked version.
321; @param 3 The modified flags.
322; @param 4 The undefined flags.
323;
324%macro IEMIMPL_BIN_OP 4
325BEGINCODE
326BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
327 PROLOGUE_3_ARGS
328 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
329 %1 byte [A0], A1_8
330 IEM_SAVE_FLAGS A2, %3, %4
331 EPILOGUE_3_ARGS
332ENDPROC iemAImpl_ %+ %1 %+ _u8
333
334BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
335 PROLOGUE_3_ARGS
336 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337 %1 word [A0], A1_16
338 IEM_SAVE_FLAGS A2, %3, %4
339 EPILOGUE_3_ARGS
340ENDPROC iemAImpl_ %+ %1 %+ _u16
341
342BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
343 PROLOGUE_3_ARGS
344 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345 %1 dword [A0], A1_32
346 IEM_SAVE_FLAGS A2, %3, %4
347 EPILOGUE_3_ARGS
348ENDPROC iemAImpl_ %+ %1 %+ _u32
349
350 %ifdef RT_ARCH_AMD64
351BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
352 PROLOGUE_3_ARGS
353 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
354 %1 qword [A0], A1
355 IEM_SAVE_FLAGS A2, %3, %4
356 EPILOGUE_3_ARGS_EX 8
357ENDPROC iemAImpl_ %+ %1 %+ _u64
358 %endif ; RT_ARCH_AMD64
359
360 %if %2 != 0 ; locked versions requested?
361
362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
363 PROLOGUE_3_ARGS
364 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
365 lock %1 byte [A0], A1_8
366 IEM_SAVE_FLAGS A2, %3, %4
367 EPILOGUE_3_ARGS
368ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
369
370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
371 PROLOGUE_3_ARGS
372 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
373 lock %1 word [A0], A1_16
374 IEM_SAVE_FLAGS A2, %3, %4
375 EPILOGUE_3_ARGS
376ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
377
378BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
379 PROLOGUE_3_ARGS
380 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
381 lock %1 dword [A0], A1_32
382 IEM_SAVE_FLAGS A2, %3, %4
383 EPILOGUE_3_ARGS
384ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
385
386 %ifdef RT_ARCH_AMD64
387BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
388 PROLOGUE_3_ARGS
389 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
390 lock %1 qword [A0], A1
391 IEM_SAVE_FLAGS A2, %3, %4
392 EPILOGUE_3_ARGS_EX 8
393ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
394 %endif ; RT_ARCH_AMD64
395 %endif ; locked
396%endmacro
397
398; instr,lock,modified-flags.
399IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
403IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
406IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
407IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
408
409
410;;
411; Macro for implementing a bit operator.
412;
413; This will generate code for the 16, 32 and 64 bit accesses with locked
414; variants, except on 32-bit system where the 64-bit accesses requires hand
415; coding.
416;
417; All the functions takes a pointer to the destination memory operand in A0,
418; the source register operand in A1 and a pointer to eflags in A2.
419;
420; @param 1 The instruction mnemonic.
421; @param 2 Non-zero if there should be a locked version.
422; @param 3 The modified flags.
423; @param 4 The undefined flags.
424;
425%macro IEMIMPL_BIT_OP 4
426BEGINCODE
427BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
428 PROLOGUE_3_ARGS
429 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
430 %1 word [A0], A1_16
431 IEM_SAVE_FLAGS A2, %3, %4
432 EPILOGUE_3_ARGS
433ENDPROC iemAImpl_ %+ %1 %+ _u16
434
435BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
436 PROLOGUE_3_ARGS
437 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
438 %1 dword [A0], A1_32
439 IEM_SAVE_FLAGS A2, %3, %4
440 EPILOGUE_3_ARGS
441ENDPROC iemAImpl_ %+ %1 %+ _u32
442
443 %ifdef RT_ARCH_AMD64
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 qword [A0], A1
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS_EX 8
450ENDPROC iemAImpl_ %+ %1 %+ _u64
451 %endif ; RT_ARCH_AMD64
452
453 %if %2 != 0 ; locked versions requested?
454
455BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
456 PROLOGUE_3_ARGS
457 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
458 lock %1 word [A0], A1_16
459 IEM_SAVE_FLAGS A2, %3, %4
460 EPILOGUE_3_ARGS
461ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
462
463BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
464 PROLOGUE_3_ARGS
465 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
466 lock %1 dword [A0], A1_32
467 IEM_SAVE_FLAGS A2, %3, %4
468 EPILOGUE_3_ARGS
469ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
470
471 %ifdef RT_ARCH_AMD64
472BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
473 PROLOGUE_3_ARGS
474 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
475 lock %1 qword [A0], A1
476 IEM_SAVE_FLAGS A2, %3, %4
477 EPILOGUE_3_ARGS_EX 8
478ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
479 %endif ; RT_ARCH_AMD64
480 %endif ; locked
481%endmacro
482IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
486
487;;
488; Macro for implementing a bit search operator.
489;
490; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
491; system where the 64-bit accesses requires hand coding.
492;
493; All the functions takes a pointer to the destination memory operand in A0,
494; the source register operand in A1 and a pointer to eflags in A2.
495;
496; @param 1 The instruction mnemonic.
497; @param 2 The modified flags.
498; @param 3 The undefined flags.
499;
500%macro IEMIMPL_BIT_OP 3
501BEGINCODE
502BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
503 PROLOGUE_3_ARGS
504 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
505 %1 T0_16, A1_16
506 jz .unchanged_dst
507 mov [A0], T0_16
508.unchanged_dst:
509 IEM_SAVE_FLAGS A2, %2, %3
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
516 %1 T0_32, A1_32
517 jz .unchanged_dst
518 mov [A0], T0_32
519.unchanged_dst:
520 IEM_SAVE_FLAGS A2, %2, %3
521 EPILOGUE_3_ARGS
522ENDPROC iemAImpl_ %+ %1 %+ _u32
523
524 %ifdef RT_ARCH_AMD64
525BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
526 PROLOGUE_3_ARGS
527 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
528 %1 T0, A1
529 jz .unchanged_dst
530 mov [A0], T0
531.unchanged_dst:
532 IEM_SAVE_FLAGS A2, %2, %3
533 EPILOGUE_3_ARGS_EX 8
534ENDPROC iemAImpl_ %+ %1 %+ _u64
535 %endif ; RT_ARCH_AMD64
536%endmacro
537IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
539
540
541;
542; IMUL is also a similar but yet different case (no lock, no mem dst).
543; The rDX:rAX variant of imul is handled together with mul further down.
544;
545BEGINCODE
546BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
547 PROLOGUE_3_ARGS
548 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
549 imul A1_16, word [A0]
550 mov [A0], A1_16
551 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
552 EPILOGUE_3_ARGS
553ENDPROC iemAImpl_imul_two_u16
554
555BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
556 PROLOGUE_3_ARGS
557 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
558 imul A1_32, dword [A0]
559 mov [A0], A1_32
560 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
561 EPILOGUE_3_ARGS
562ENDPROC iemAImpl_imul_two_u32
563
564%ifdef RT_ARCH_AMD64
565BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
566 PROLOGUE_3_ARGS
567 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
568 imul A1, qword [A0]
569 mov [A0], A1
570 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
571 EPILOGUE_3_ARGS_EX 8
572ENDPROC iemAImpl_imul_two_u64
573%endif ; RT_ARCH_AMD64
574
575
576;
577; XCHG for memory operands. This implies locking. No flag changes.
578;
579; Each function takes two arguments, first the pointer to the memory,
580; then the pointer to the register. They all return void.
581;
582BEGINCODE
583BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
584 PROLOGUE_2_ARGS
585 mov T0_8, [A1]
586 xchg [A0], T0_8
587 mov [A1], T0_8
588 EPILOGUE_2_ARGS
589ENDPROC iemAImpl_xchg_u8_locked
590
591BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
592 PROLOGUE_2_ARGS
593 mov T0_16, [A1]
594 xchg [A0], T0_16
595 mov [A1], T0_16
596 EPILOGUE_2_ARGS
597ENDPROC iemAImpl_xchg_u16_locked
598
599BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
600 PROLOGUE_2_ARGS
601 mov T0_32, [A1]
602 xchg [A0], T0_32
603 mov [A1], T0_32
604 EPILOGUE_2_ARGS
605ENDPROC iemAImpl_xchg_u32_locked
606
607%ifdef RT_ARCH_AMD64
608BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
609 PROLOGUE_2_ARGS
610 mov T0, [A1]
611 xchg [A0], T0
612 mov [A1], T0
613 EPILOGUE_2_ARGS
614ENDPROC iemAImpl_xchg_u64_locked
615%endif
616
617; Unlocked variants for fDisregardLock mode.
618
619BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
620 PROLOGUE_2_ARGS
621 mov T0_8, [A1]
622 mov T1_8, [A0]
623 mov [A0], T0_8
624 mov [A1], T1_8
625 EPILOGUE_2_ARGS
626ENDPROC iemAImpl_xchg_u8_unlocked
627
628BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
629 PROLOGUE_2_ARGS
630 mov T0_16, [A1]
631 mov T1_16, [A0]
632 mov [A0], T0_16
633 mov [A1], T1_16
634 EPILOGUE_2_ARGS
635ENDPROC iemAImpl_xchg_u16_unlocked
636
637BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
638 PROLOGUE_2_ARGS
639 mov T0_32, [A1]
640 mov T1_32, [A0]
641 mov [A0], T0_32
642 mov [A1], T1_32
643 EPILOGUE_2_ARGS
644ENDPROC iemAImpl_xchg_u32_unlocked
645
646%ifdef RT_ARCH_AMD64
647BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
648 PROLOGUE_2_ARGS
649 mov T0, [A1]
650 mov T1, [A0]
651 mov [A0], T0
652 mov [A1], T1
653 EPILOGUE_2_ARGS
654ENDPROC iemAImpl_xchg_u64_unlocked
655%endif
656
657
658;
659; XADD for memory operands.
660;
661; Each function takes three arguments, first the pointer to the
662; memory/register, then the pointer to the register, and finally a pointer to
663; eflags. They all return void.
664;
665BEGINCODE
666BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
667 PROLOGUE_3_ARGS
668 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
669 mov T0_8, [A1]
670 xadd [A0], T0_8
671 mov [A1], T0_8
672 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
673 EPILOGUE_3_ARGS
674ENDPROC iemAImpl_xadd_u8
675
676BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
677 PROLOGUE_3_ARGS
678 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
679 mov T0_16, [A1]
680 xadd [A0], T0_16
681 mov [A1], T0_16
682 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
683 EPILOGUE_3_ARGS
684ENDPROC iemAImpl_xadd_u16
685
686BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0_32, [A1]
690 xadd [A0], T0_32
691 mov [A1], T0_32
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694ENDPROC iemAImpl_xadd_u32
695
696%ifdef RT_ARCH_AMD64
697BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
698 PROLOGUE_3_ARGS
699 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
700 mov T0, [A1]
701 xadd [A0], T0
702 mov [A1], T0
703 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
704 EPILOGUE_3_ARGS
705ENDPROC iemAImpl_xadd_u64
706%endif ; RT_ARCH_AMD64
707
708BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
709 PROLOGUE_3_ARGS
710 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
711 mov T0_8, [A1]
712 lock xadd [A0], T0_8
713 mov [A1], T0_8
714 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
715 EPILOGUE_3_ARGS
716ENDPROC iemAImpl_xadd_u8_locked
717
718BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
719 PROLOGUE_3_ARGS
720 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
721 mov T0_16, [A1]
722 lock xadd [A0], T0_16
723 mov [A1], T0_16
724 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
725 EPILOGUE_3_ARGS
726ENDPROC iemAImpl_xadd_u16_locked
727
728BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
729 PROLOGUE_3_ARGS
730 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
731 mov T0_32, [A1]
732 lock xadd [A0], T0_32
733 mov [A1], T0_32
734 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
735 EPILOGUE_3_ARGS
736ENDPROC iemAImpl_xadd_u32_locked
737
738%ifdef RT_ARCH_AMD64
739BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
740 PROLOGUE_3_ARGS
741 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
742 mov T0, [A1]
743 lock xadd [A0], T0
744 mov [A1], T0
745 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
746 EPILOGUE_3_ARGS
747ENDPROC iemAImpl_xadd_u64_locked
748%endif ; RT_ARCH_AMD64
749
750
751;
752; CMPXCHG8B.
753;
754; These are tricky register wise, so the code is duplicated for each calling
755; convention.
756;
757; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
758;
759; C-proto:
760; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
761; uint32_t *pEFlags));
762;
763; Note! Identical to iemAImpl_cmpxchg16b.
764;
765BEGINCODE
766BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
767%ifdef RT_ARCH_AMD64
768 %ifdef ASM_CALL64_MSC
769 push rbx
770
771 mov r11, rdx ; pu64EaxEdx (is also T1)
772 mov r10, rcx ; pu64Dst
773
774 mov ebx, [r8]
775 mov ecx, [r8 + 4]
776 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
777 mov eax, [r11]
778 mov edx, [r11 + 4]
779
780 lock cmpxchg8b [r10]
781
782 mov [r11], eax
783 mov [r11 + 4], edx
784 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
785
786 pop rbx
787 ret
788 %else
789 push rbx
790
791 mov r10, rcx ; pEFlags
792 mov r11, rdx ; pu64EbxEcx (is also T1)
793
794 mov ebx, [r11]
795 mov ecx, [r11 + 4]
796 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
797 mov eax, [rsi]
798 mov edx, [rsi + 4]
799
800 lock cmpxchg8b [rdi]
801
802 mov [rsi], eax
803 mov [rsi + 4], edx
804 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
805
806 pop rbx
807 ret
808
809 %endif
810%else
811 push esi
812 push edi
813 push ebx
814 push ebp
815
816 mov edi, ecx ; pu64Dst
817 mov esi, edx ; pu64EaxEdx
818 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
819 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
820
821 mov ebx, [ecx]
822 mov ecx, [ecx + 4]
823 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
824 mov eax, [esi]
825 mov edx, [esi + 4]
826
827 lock cmpxchg8b [edi]
828
829 mov [esi], eax
830 mov [esi + 4], edx
831 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
832
833 pop ebp
834 pop ebx
835 pop edi
836 pop esi
837 ret 8
838%endif
839ENDPROC iemAImpl_cmpxchg8b
840
841BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
842 ; Lazy bird always lock prefixes cmpxchg8b.
843 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
844ENDPROC iemAImpl_cmpxchg8b_locked
845
846%ifdef RT_ARCH_AMD64
847
848;
849; CMPXCHG16B.
850;
851; These are tricky register wise, so the code is duplicated for each calling
852; convention.
853;
854; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
855;
856; C-proto:
857; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
858; uint32_t *pEFlags));
859;
860; Note! Identical to iemAImpl_cmpxchg8b.
861;
862BEGINCODE
863BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
864 %ifdef ASM_CALL64_MSC
865 push rbx
866
867 mov r11, rdx ; pu64RaxRdx (is also T1)
868 mov r10, rcx ; pu64Dst
869
870 mov rbx, [r8]
871 mov rcx, [r8 + 8]
872 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
873 mov rax, [r11]
874 mov rdx, [r11 + 8]
875
876 lock cmpxchg16b [r10]
877
878 mov [r11], rax
879 mov [r11 + 8], rdx
880 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
881
882 pop rbx
883 ret
884 %else
885 push rbx
886
887 mov r10, rcx ; pEFlags
888 mov r11, rdx ; pu64RbxRcx (is also T1)
889
890 mov rbx, [r11]
891 mov rcx, [r11 + 8]
892 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
893 mov rax, [rsi]
894 mov rdx, [rsi + 8]
895
896 lock cmpxchg16b [rdi]
897
898 mov [rsi], eax
899 mov [rsi + 8], edx
900 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
901
902 pop rbx
903 ret
904
905 %endif
906ENDPROC iemAImpl_cmpxchg16b
907
908BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
909 ; Lazy bird always lock prefixes cmpxchg8b.
910 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
911ENDPROC iemAImpl_cmpxchg16b_locked
912
913%endif ; RT_ARCH_AMD64
914
915
916;
917; CMPXCHG.
918;
919; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
920;
921; C-proto:
922; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
923;
924BEGINCODE
925%macro IEMIMPL_CMPXCHG 2
926BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
927 PROLOGUE_4_ARGS
928 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
929 mov al, [A1]
930 %1 cmpxchg [A0], A2_8
931 mov [A1], al
932 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
933 EPILOGUE_4_ARGS
934ENDPROC iemAImpl_cmpxchg_u8 %+ %2
935
936BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
937 PROLOGUE_4_ARGS
938 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
939 mov ax, [A1]
940 %1 cmpxchg [A0], A2_16
941 mov [A1], ax
942 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
943 EPILOGUE_4_ARGS
944ENDPROC iemAImpl_cmpxchg_u16 %+ %2
945
946BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
947 PROLOGUE_4_ARGS
948 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
949 mov eax, [A1]
950 %1 cmpxchg [A0], A2_32
951 mov [A1], eax
952 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
953 EPILOGUE_4_ARGS
954ENDPROC iemAImpl_cmpxchg_u32 %+ %2
955
956BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
957%ifdef RT_ARCH_AMD64
958 PROLOGUE_4_ARGS
959 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
960 mov rax, [A1]
961 %1 cmpxchg [A0], A2
962 mov [A1], rax
963 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
964 EPILOGUE_4_ARGS
965%else
966 ;
967 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
968 ;
969 push esi
970 push edi
971 push ebx
972 push ebp
973
974 mov edi, ecx ; pu64Dst
975 mov esi, edx ; pu64Rax
976 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
977 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
978
979 mov ebx, [ecx]
980 mov ecx, [ecx + 4]
981 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
982 mov eax, [esi]
983 mov edx, [esi + 4]
984
985 lock cmpxchg8b [edi]
986
987 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
988 jz .cmpxchg8b_not_equal
989 cmp eax, eax ; just set the other flags.
990.store:
991 mov [esi], eax
992 mov [esi + 4], edx
993 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
994
995 pop ebp
996 pop ebx
997 pop edi
998 pop esi
999 ret 8
1000
1001.cmpxchg8b_not_equal:
1002 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1003 jne .store
1004 cmp [esi], eax
1005 jmp .store
1006
1007%endif
1008ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1009%endmacro ; IEMIMPL_CMPXCHG
1010
1011IEMIMPL_CMPXCHG , ,
1012IEMIMPL_CMPXCHG lock, _locked
1013
1014;;
1015; Macro for implementing a unary operator.
1016;
1017; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1018; variants, except on 32-bit system where the 64-bit accesses requires hand
1019; coding.
1020;
1021; All the functions takes a pointer to the destination memory operand in A0,
1022; the source register operand in A1 and a pointer to eflags in A2.
1023;
1024; @param 1 The instruction mnemonic.
1025; @param 2 The modified flags.
1026; @param 3 The undefined flags.
1027;
1028%macro IEMIMPL_UNARY_OP 3
1029BEGINCODE
1030BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1031 PROLOGUE_2_ARGS
1032 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1033 %1 byte [A0]
1034 IEM_SAVE_FLAGS A1, %2, %3
1035 EPILOGUE_2_ARGS
1036ENDPROC iemAImpl_ %+ %1 %+ _u8
1037
1038BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1039 PROLOGUE_2_ARGS
1040 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1041 lock %1 byte [A0]
1042 IEM_SAVE_FLAGS A1, %2, %3
1043 EPILOGUE_2_ARGS
1044ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1045
1046BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1047 PROLOGUE_2_ARGS
1048 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1049 %1 word [A0]
1050 IEM_SAVE_FLAGS A1, %2, %3
1051 EPILOGUE_2_ARGS
1052ENDPROC iemAImpl_ %+ %1 %+ _u16
1053
1054BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1055 PROLOGUE_2_ARGS
1056 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1057 lock %1 word [A0]
1058 IEM_SAVE_FLAGS A1, %2, %3
1059 EPILOGUE_2_ARGS
1060ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1061
1062BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1063 PROLOGUE_2_ARGS
1064 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1065 %1 dword [A0]
1066 IEM_SAVE_FLAGS A1, %2, %3
1067 EPILOGUE_2_ARGS
1068ENDPROC iemAImpl_ %+ %1 %+ _u32
1069
1070BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1071 PROLOGUE_2_ARGS
1072 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1073 lock %1 dword [A0]
1074 IEM_SAVE_FLAGS A1, %2, %3
1075 EPILOGUE_2_ARGS
1076ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1077
1078 %ifdef RT_ARCH_AMD64
1079BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1080 PROLOGUE_2_ARGS
1081 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1082 %1 qword [A0]
1083 IEM_SAVE_FLAGS A1, %2, %3
1084 EPILOGUE_2_ARGS
1085ENDPROC iemAImpl_ %+ %1 %+ _u64
1086
1087BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1088 PROLOGUE_2_ARGS
1089 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1090 lock %1 qword [A0]
1091 IEM_SAVE_FLAGS A1, %2, %3
1092 EPILOGUE_2_ARGS
1093ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1094 %endif ; RT_ARCH_AMD64
1095
1096%endmacro
1097
1098IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1099IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1100IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1101IEMIMPL_UNARY_OP not, 0, 0
1102
1103
1104;;
1105; Macro for implementing memory fence operation.
1106;
1107; No return value, no operands or anything.
1108;
1109; @param 1 The instruction.
1110;
1111%macro IEMIMPL_MEM_FENCE 1
1112BEGINCODE
1113BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1114 %1
1115 ret
1116ENDPROC iemAImpl_ %+ %1
1117%endmacro
1118
1119IEMIMPL_MEM_FENCE lfence
1120IEMIMPL_MEM_FENCE sfence
1121IEMIMPL_MEM_FENCE mfence
1122
1123;;
1124; Alternative for non-SSE2 host.
1125;
1126BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1127 push xAX
1128 xchg xAX, [xSP]
1129 add xSP, xCB
1130 ret
1131ENDPROC iemAImpl_alt_mem_fence
1132
1133
1134
1135;;
1136; Macro for implementing a shift operation.
1137;
1138; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1139; 32-bit system where the 64-bit accesses requires hand coding.
1140;
1141; All the functions takes a pointer to the destination memory operand in A0,
1142; the shift count in A1 and a pointer to eflags in A2.
1143;
1144; @param 1 The instruction mnemonic.
1145; @param 2 The modified flags.
1146; @param 3 The undefined flags.
1147;
1148; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1149;
1150%macro IEMIMPL_SHIFT_OP 3
1151BEGINCODE
1152BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1153 PROLOGUE_3_ARGS
1154 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1155 %ifdef ASM_CALL64_GCC
1156 mov cl, A1_8
1157 %1 byte [A0], cl
1158 %else
1159 xchg A1, A0
1160 %1 byte [A1], cl
1161 %endif
1162 IEM_SAVE_FLAGS A2, %2, %3
1163 EPILOGUE_3_ARGS
1164ENDPROC iemAImpl_ %+ %1 %+ _u8
1165
1166BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1167 PROLOGUE_3_ARGS
1168 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1169 %ifdef ASM_CALL64_GCC
1170 mov cl, A1_8
1171 %1 word [A0], cl
1172 %else
1173 xchg A1, A0
1174 %1 word [A1], cl
1175 %endif
1176 IEM_SAVE_FLAGS A2, %2, %3
1177 EPILOGUE_3_ARGS
1178ENDPROC iemAImpl_ %+ %1 %+ _u16
1179
1180BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1181 PROLOGUE_3_ARGS
1182 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1183 %ifdef ASM_CALL64_GCC
1184 mov cl, A1_8
1185 %1 dword [A0], cl
1186 %else
1187 xchg A1, A0
1188 %1 dword [A1], cl
1189 %endif
1190 IEM_SAVE_FLAGS A2, %2, %3
1191 EPILOGUE_3_ARGS
1192ENDPROC iemAImpl_ %+ %1 %+ _u32
1193
1194 %ifdef RT_ARCH_AMD64
1195BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1196 PROLOGUE_3_ARGS
1197 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1198 %ifdef ASM_CALL64_GCC
1199 mov cl, A1_8
1200 %1 qword [A0], cl
1201 %else
1202 xchg A1, A0
1203 %1 qword [A1], cl
1204 %endif
1205 IEM_SAVE_FLAGS A2, %2, %3
1206 EPILOGUE_3_ARGS
1207ENDPROC iemAImpl_ %+ %1 %+ _u64
1208 %endif ; RT_ARCH_AMD64
1209
1210%endmacro
1211
1212IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1213IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1214IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1215IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1216IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1217IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1218IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1219
1220
1221;;
1222; Macro for implementing a double precision shift operation.
1223;
1224; This will generate code for the 16, 32 and 64 bit accesses, except on
1225; 32-bit system where the 64-bit accesses requires hand coding.
1226;
1227; The functions takes the destination operand (r/m) in A0, the source (reg) in
1228; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1229;
1230; @param 1 The instruction mnemonic.
1231; @param 2 The modified flags.
1232; @param 3 The undefined flags.
1233;
1234; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1235;
1236%macro IEMIMPL_SHIFT_DBL_OP 3
1237BEGINCODE
1238BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239 PROLOGUE_4_ARGS
1240 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241 %ifdef ASM_CALL64_GCC
1242 xchg A3, A2
1243 %1 [A0], A1_16, cl
1244 xchg A3, A2
1245 %else
1246 xchg A0, A2
1247 %1 [A2], A1_16, cl
1248 %endif
1249 IEM_SAVE_FLAGS A3, %2, %3
1250 EPILOGUE_4_ARGS
1251ENDPROC iemAImpl_ %+ %1 %+ _u16
1252
1253BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1254 PROLOGUE_4_ARGS
1255 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1256 %ifdef ASM_CALL64_GCC
1257 xchg A3, A2
1258 %1 [A0], A1_32, cl
1259 xchg A3, A2
1260 %else
1261 xchg A0, A2
1262 %1 [A2], A1_32, cl
1263 %endif
1264 IEM_SAVE_FLAGS A3, %2, %3
1265 EPILOGUE_4_ARGS
1266ENDPROC iemAImpl_ %+ %1 %+ _u32
1267
1268 %ifdef RT_ARCH_AMD64
1269BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1270 PROLOGUE_4_ARGS
1271 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1272 %ifdef ASM_CALL64_GCC
1273 xchg A3, A2
1274 %1 [A0], A1, cl
1275 xchg A3, A2
1276 %else
1277 xchg A0, A2
1278 %1 [A2], A1, cl
1279 %endif
1280 IEM_SAVE_FLAGS A3, %2, %3
1281 EPILOGUE_4_ARGS_EX 12
1282ENDPROC iemAImpl_ %+ %1 %+ _u64
1283 %endif ; RT_ARCH_AMD64
1284
1285%endmacro
1286
1287IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1288IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1289
1290
1291;;
1292; Macro for implementing a multiplication operations.
1293;
1294; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1295; 32-bit system where the 64-bit accesses requires hand coding.
1296;
1297; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1298; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1299; pointer to eflags in A3.
1300;
1301; The functions all return 0 so the caller can be used for div/idiv as well as
1302; for the mul/imul implementation.
1303;
1304; @param 1 The instruction mnemonic.
1305; @param 2 The modified flags.
1306; @param 3 The undefined flags.
1307;
1308; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1309;
1310%macro IEMIMPL_MUL_OP 3
1311BEGINCODE
1312BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1313 PROLOGUE_3_ARGS
1314 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1315 mov al, [A0]
1316 %1 A1_8
1317 mov [A0], ax
1318 IEM_SAVE_FLAGS A2, %2, %3
1319 xor eax, eax
1320 EPILOGUE_3_ARGS
1321ENDPROC iemAImpl_ %+ %1 %+ _u8
1322
1323BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1324 PROLOGUE_4_ARGS
1325 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1326 mov ax, [A0]
1327 %ifdef ASM_CALL64_GCC
1328 %1 A2_16
1329 mov [A0], ax
1330 mov [A1], dx
1331 %else
1332 mov T1, A1
1333 %1 A2_16
1334 mov [A0], ax
1335 mov [T1], dx
1336 %endif
1337 IEM_SAVE_FLAGS A3, %2, %3
1338 xor eax, eax
1339 EPILOGUE_4_ARGS
1340ENDPROC iemAImpl_ %+ %1 %+ _u16
1341
1342BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1343 PROLOGUE_4_ARGS
1344 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1345 mov eax, [A0]
1346 %ifdef ASM_CALL64_GCC
1347 %1 A2_32
1348 mov [A0], eax
1349 mov [A1], edx
1350 %else
1351 mov T1, A1
1352 %1 A2_32
1353 mov [A0], eax
1354 mov [T1], edx
1355 %endif
1356 IEM_SAVE_FLAGS A3, %2, %3
1357 xor eax, eax
1358 EPILOGUE_4_ARGS
1359ENDPROC iemAImpl_ %+ %1 %+ _u32
1360
1361 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1363 PROLOGUE_4_ARGS
1364 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1365 mov rax, [A0]
1366 %ifdef ASM_CALL64_GCC
1367 %1 A2
1368 mov [A0], rax
1369 mov [A1], rdx
1370 %else
1371 mov T1, A1
1372 %1 A2
1373 mov [A0], rax
1374 mov [T1], rdx
1375 %endif
1376 IEM_SAVE_FLAGS A3, %2, %3
1377 xor eax, eax
1378 EPILOGUE_4_ARGS_EX 12
1379ENDPROC iemAImpl_ %+ %1 %+ _u64
1380 %endif ; !RT_ARCH_AMD64
1381
1382%endmacro
1383
1384IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1385IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1386
1387
1388BEGINCODE
1389;;
1390; Worker function for negating a 32-bit number in T1:T0
1391; @uses None (T0,T1)
1392BEGINPROC iemAImpl_negate_T0_T1_u32
1393 push 0
1394 push 0
1395 xchg T0_32, [xSP]
1396 xchg T1_32, [xSP + xCB]
1397 sub T0_32, [xSP]
1398 sbb T1_32, [xSP + xCB]
1399 add xSP, xCB*2
1400 ret
1401ENDPROC iemAImpl_negate_T0_T1_u32
1402
1403%ifdef RT_ARCH_AMD64
1404;;
1405; Worker function for negating a 64-bit number in T1:T0
1406; @uses None (T0,T1)
1407BEGINPROC iemAImpl_negate_T0_T1_u64
1408 push 0
1409 push 0
1410 xchg T0, [xSP]
1411 xchg T1, [xSP + xCB]
1412 sub T0, [xSP]
1413 sbb T1, [xSP + xCB]
1414 add xSP, xCB*2
1415 ret
1416ENDPROC iemAImpl_negate_T0_T1_u64
1417%endif
1418
1419
1420;;
1421; Macro for implementing a division operations.
1422;
1423; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1424; 32-bit system where the 64-bit accesses requires hand coding.
1425;
1426; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1427; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1428; pointer to eflags in A3.
1429;
1430; The functions all return 0 on success and -1 if a divide error should be
1431; raised by the caller.
1432;
1433; @param 1 The instruction mnemonic.
1434; @param 2 The modified flags.
1435; @param 3 The undefined flags.
1436; @param 4 1 if signed, 0 if unsigned.
1437;
1438; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1439;
1440%macro IEMIMPL_DIV_OP 4
1441BEGINCODE
1442BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1443 PROLOGUE_3_ARGS
1444
1445 ; div by chainsaw check.
1446 test A1_8, A1_8
1447 jz .div_zero
1448
1449 ; Overflow check - unsigned division is simple to verify, haven't
1450 ; found a simple way to check signed division yet unfortunately.
1451 %if %4 == 0
1452 cmp [A0 + 1], A1_8
1453 jae .div_overflow
1454 %else
1455 mov T0_16, [A0] ; T0 = dividend
1456 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1457 test A1_8, A1_8
1458 js .divisor_negative
1459 test T0_16, T0_16
1460 jns .both_positive
1461 neg T0_16
1462.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1463 push T0 ; Start off like unsigned below.
1464 shr T0_16, 7
1465 cmp T0_8, A1_8
1466 pop T0
1467 jb .div_no_overflow
1468 ja .div_overflow
1469 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1470 cmp T0_8, A1_8
1471 jae .div_overflow
1472 jmp .div_no_overflow
1473
1474.divisor_negative:
1475 neg A1_8
1476 test T0_16, T0_16
1477 jns .one_of_each
1478 neg T0_16
1479.both_positive: ; Same as unsigned shifted by sign indicator bit.
1480 shr T0_16, 7
1481 cmp T0_8, A1_8
1482 jae .div_overflow
1483.div_no_overflow:
1484 mov A1, T1 ; restore divisor
1485 %endif
1486
1487 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1488 mov ax, [A0]
1489 %1 A1_8
1490 mov [A0], ax
1491 IEM_SAVE_FLAGS A2, %2, %3
1492 xor eax, eax
1493
1494.return:
1495 EPILOGUE_3_ARGS
1496
1497.div_zero:
1498.div_overflow:
1499 mov eax, -1
1500 jmp .return
1501ENDPROC iemAImpl_ %+ %1 %+ _u8
1502
1503BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1504 PROLOGUE_4_ARGS
1505
1506 ; div by chainsaw check.
1507 test A2_16, A2_16
1508 jz .div_zero
1509
1510 ; Overflow check - unsigned division is simple to verify, haven't
1511 ; found a simple way to check signed division yet unfortunately.
1512 %if %4 == 0
1513 cmp [A1], A2_16
1514 jae .div_overflow
1515 %else
1516 mov T0_16, [A1]
1517 shl T0_32, 16
1518 mov T0_16, [A0] ; T0 = dividend
1519 mov T1, A2 ; T1 = divisor
1520 test T1_16, T1_16
1521 js .divisor_negative
1522 test T0_32, T0_32
1523 jns .both_positive
1524 neg T0_32
1525.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1526 push T0 ; Start off like unsigned below.
1527 shr T0_32, 15
1528 cmp T0_16, T1_16
1529 pop T0
1530 jb .div_no_overflow
1531 ja .div_overflow
1532 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1533 cmp T0_16, T1_16
1534 jae .div_overflow
1535 jmp .div_no_overflow
1536
1537.divisor_negative:
1538 neg T1_16
1539 test T0_32, T0_32
1540 jns .one_of_each
1541 neg T0_32
1542.both_positive: ; Same as unsigned shifted by sign indicator bit.
1543 shr T0_32, 15
1544 cmp T0_16, T1_16
1545 jae .div_overflow
1546.div_no_overflow:
1547 %endif
1548
1549 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1550 %ifdef ASM_CALL64_GCC
1551 mov T1, A2
1552 mov ax, [A0]
1553 mov dx, [A1]
1554 %1 T1_16
1555 mov [A0], ax
1556 mov [A1], dx
1557 %else
1558 mov T1, A1
1559 mov ax, [A0]
1560 mov dx, [T1]
1561 %1 A2_16
1562 mov [A0], ax
1563 mov [T1], dx
1564 %endif
1565 IEM_SAVE_FLAGS A3, %2, %3
1566 xor eax, eax
1567
1568.return:
1569 EPILOGUE_4_ARGS
1570
1571.div_zero:
1572.div_overflow:
1573 mov eax, -1
1574 jmp .return
1575ENDPROC iemAImpl_ %+ %1 %+ _u16
1576
1577BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1578 PROLOGUE_4_ARGS
1579
1580 ; div by chainsaw check.
1581 test A2_32, A2_32
1582 jz .div_zero
1583
1584 ; Overflow check - unsigned division is simple to verify, haven't
1585 ; found a simple way to check signed division yet unfortunately.
1586 %if %4 == 0
1587 cmp [A1], A2_32
1588 jae .div_overflow
1589 %else
1590 push A2 ; save A2 so we modify it (we out of regs on x86).
1591 mov T0_32, [A0] ; T0 = dividend low
1592 mov T1_32, [A1] ; T1 = dividend high
1593 test A2_32, A2_32
1594 js .divisor_negative
1595 test T1_32, T1_32
1596 jns .both_positive
1597 call NAME(iemAImpl_negate_T0_T1_u32)
1598.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1599 push T0 ; Start off like unsigned below.
1600 shl T1_32, 1
1601 shr T0_32, 31
1602 or T1_32, T0_32
1603 cmp T1_32, A2_32
1604 pop T0
1605 jb .div_no_overflow
1606 ja .div_overflow
1607 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1608 cmp T0_32, A2_32
1609 jae .div_overflow
1610 jmp .div_no_overflow
1611
1612.divisor_negative:
1613 neg A2_32
1614 test T1_32, T1_32
1615 jns .one_of_each
1616 call NAME(iemAImpl_negate_T0_T1_u32)
1617.both_positive: ; Same as unsigned shifted by sign indicator bit.
1618 shl T1_32, 1
1619 shr T0_32, 31
1620 or T1_32, T0_32
1621 cmp T1_32, A2_32
1622 jae .div_overflow
1623.div_no_overflow:
1624 pop A2
1625 %endif
1626
1627 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1628 mov eax, [A0]
1629 %ifdef ASM_CALL64_GCC
1630 mov T1, A2
1631 mov eax, [A0]
1632 mov edx, [A1]
1633 %1 T1_32
1634 mov [A0], eax
1635 mov [A1], edx
1636 %else
1637 mov T1, A1
1638 mov eax, [A0]
1639 mov edx, [T1]
1640 %1 A2_32
1641 mov [A0], eax
1642 mov [T1], edx
1643 %endif
1644 IEM_SAVE_FLAGS A3, %2, %3
1645 xor eax, eax
1646
1647.return:
1648 EPILOGUE_4_ARGS
1649
1650.div_overflow:
1651 %if %4 != 0
1652 pop A2
1653 %endif
1654.div_zero:
1655 mov eax, -1
1656 jmp .return
1657ENDPROC iemAImpl_ %+ %1 %+ _u32
1658
1659 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1660BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1661 PROLOGUE_4_ARGS
1662
1663 test A2, A2
1664 jz .div_zero
1665 %if %4 == 0
1666 cmp [A1], A2
1667 jae .div_overflow
1668 %else
1669 push A2 ; save A2 so we modify it (we out of regs on x86).
1670 mov T0, [A0] ; T0 = dividend low
1671 mov T1, [A1] ; T1 = dividend high
1672 test A2, A2
1673 js .divisor_negative
1674 test T1, T1
1675 jns .both_positive
1676 call NAME(iemAImpl_negate_T0_T1_u64)
1677.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1678 push T0 ; Start off like unsigned below.
1679 shl T1, 1
1680 shr T0, 63
1681 or T1, T0
1682 cmp T1, A2
1683 pop T0
1684 jb .div_no_overflow
1685 ja .div_overflow
1686 mov T1, 0x7fffffffffffffff
1687 and T0, T1 ; Special case for covering (divisor - 1).
1688 cmp T0, A2
1689 jae .div_overflow
1690 jmp .div_no_overflow
1691
1692.divisor_negative:
1693 neg A2
1694 test T1, T1
1695 jns .one_of_each
1696 call NAME(iemAImpl_negate_T0_T1_u64)
1697.both_positive: ; Same as unsigned shifted by sign indicator bit.
1698 shl T1, 1
1699 shr T0, 63
1700 or T1, T0
1701 cmp T1, A2
1702 jae .div_overflow
1703.div_no_overflow:
1704 pop A2
1705 %endif
1706
1707 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1708 mov rax, [A0]
1709 %ifdef ASM_CALL64_GCC
1710 mov T1, A2
1711 mov rax, [A0]
1712 mov rdx, [A1]
1713 %1 T1
1714 mov [A0], rax
1715 mov [A1], rdx
1716 %else
1717 mov T1, A1
1718 mov rax, [A0]
1719 mov rdx, [T1]
1720 %1 A2
1721 mov [A0], rax
1722 mov [T1], rdx
1723 %endif
1724 IEM_SAVE_FLAGS A3, %2, %3
1725 xor eax, eax
1726
1727.return:
1728 EPILOGUE_4_ARGS_EX 12
1729
1730.div_overflow:
1731 %if %4 != 0
1732 pop A2
1733 %endif
1734.div_zero:
1735 mov eax, -1
1736 jmp .return
1737ENDPROC iemAImpl_ %+ %1 %+ _u64
1738 %endif ; !RT_ARCH_AMD64
1739
1740%endmacro
1741
1742IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1743IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1744
1745
1746;
1747; BSWAP. No flag changes.
1748;
1749; Each function takes one argument, pointer to the value to bswap
1750; (input/output). They all return void.
1751;
1752BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1753 PROLOGUE_1_ARGS
1754 mov T0_32, [A0] ; just in case any of the upper bits are used.
1755 db 66h
1756 bswap T0_32
1757 mov [A0], T0_32
1758 EPILOGUE_1_ARGS
1759ENDPROC iemAImpl_bswap_u16
1760
1761BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1762 PROLOGUE_1_ARGS
1763 mov T0_32, [A0]
1764 bswap T0_32
1765 mov [A0], T0_32
1766 EPILOGUE_1_ARGS
1767ENDPROC iemAImpl_bswap_u32
1768
1769BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1770%ifdef RT_ARCH_AMD64
1771 PROLOGUE_1_ARGS
1772 mov T0, [A0]
1773 bswap T0
1774 mov [A0], T0
1775 EPILOGUE_1_ARGS
1776%else
1777 PROLOGUE_1_ARGS
1778 mov T0, [A0]
1779 mov T1, [A0 + 4]
1780 bswap T0
1781 bswap T1
1782 mov [A0 + 4], T0
1783 mov [A0], T1
1784 EPILOGUE_1_ARGS
1785%endif
1786ENDPROC iemAImpl_bswap_u64
1787
1788
1789;;
1790; Initialize the FPU for the actual instruction being emulated, this means
1791; loading parts of the guest's control word and status word.
1792;
1793; @uses 24 bytes of stack.
1794; @param 1 Expression giving the address of the FXSTATE of the guest.
1795;
1796%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1797 fnstenv [xSP]
1798
1799 ; FCW - for exception, precision and rounding control.
1800 movzx T0, word [%1 + X86FXSTATE.FCW]
1801 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1802 mov [xSP + X86FSTENV32P.FCW], T0_16
1803
1804 ; FSW - for undefined C0, C1, C2, and C3.
1805 movzx T1, word [%1 + X86FXSTATE.FSW]
1806 and T1, X86_FSW_C_MASK
1807 movzx T0, word [xSP + X86FSTENV32P.FSW]
1808 and T0, X86_FSW_TOP_MASK
1809 or T0, T1
1810 mov [xSP + X86FSTENV32P.FSW], T0_16
1811
1812 fldenv [xSP]
1813%endmacro
1814
1815
1816;;
1817; Need to move this as well somewhere better?
1818;
1819struc IEMFPURESULT
1820 .r80Result resw 5
1821 .FSW resw 1
1822endstruc
1823
1824
1825;;
1826; Need to move this as well somewhere better?
1827;
1828struc IEMFPURESULTTWO
1829 .r80Result1 resw 5
1830 .FSW resw 1
1831 .r80Result2 resw 5
1832endstruc
1833
1834
1835;
1836;---------------------- 16-bit signed integer operations ----------------------
1837;
1838
1839
1840;;
1841; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1842;
1843; @param A0 FPU context (fxsave).
1844; @param A1 Pointer to a IEMFPURESULT for the output.
1845; @param A2 Pointer to the 16-bit floating point value to convert.
1846;
1847BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1848 PROLOGUE_3_ARGS
1849 sub xSP, 20h
1850
1851 fninit
1852 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1853 fild word [A2]
1854
1855 fnstsw word [A1 + IEMFPURESULT.FSW]
1856 fnclex
1857 fstp tword [A1 + IEMFPURESULT.r80Result]
1858
1859 fninit
1860 add xSP, 20h
1861 EPILOGUE_3_ARGS
1862ENDPROC iemAImpl_fild_i16_to_r80
1863
1864
1865;;
1866; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1867;
1868; @param A0 FPU context (fxsave).
1869; @param A1 Where to return the output FSW.
1870; @param A2 Where to store the 16-bit signed integer value.
1871; @param A3 Pointer to the 80-bit value.
1872;
1873BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1874 PROLOGUE_4_ARGS
1875 sub xSP, 20h
1876
1877 fninit
1878 fld tword [A3]
1879 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1880 fistp word [A2]
1881
1882 fnstsw word [A1]
1883
1884 fninit
1885 add xSP, 20h
1886 EPILOGUE_4_ARGS
1887ENDPROC iemAImpl_fist_r80_to_i16
1888
1889
1890;;
1891; Store a 80-bit floating point value (register) as a 16-bit signed integer
1892; (memory) with truncation.
1893;
1894; @param A0 FPU context (fxsave).
1895; @param A1 Where to return the output FSW.
1896; @param A2 Where to store the 16-bit signed integer value.
1897; @param A3 Pointer to the 80-bit value.
1898;
1899BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1900 PROLOGUE_4_ARGS
1901 sub xSP, 20h
1902
1903 fninit
1904 fld tword [A3]
1905 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1906 fisttp dword [A2]
1907
1908 fnstsw word [A1]
1909
1910 fninit
1911 add xSP, 20h
1912 EPILOGUE_4_ARGS
1913ENDPROC iemAImpl_fistt_r80_to_i16
1914
1915
1916;;
1917; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1918;
1919; @param 1 The instruction
1920;
1921; @param A0 FPU context (fxsave).
1922; @param A1 Pointer to a IEMFPURESULT for the output.
1923; @param A2 Pointer to the 80-bit value.
1924; @param A3 Pointer to the 16-bit value.
1925;
1926%macro IEMIMPL_FPU_R80_BY_I16 1
1927BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1928 PROLOGUE_4_ARGS
1929 sub xSP, 20h
1930
1931 fninit
1932 fld tword [A2]
1933 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1934 %1 word [A3]
1935
1936 fnstsw word [A1 + IEMFPURESULT.FSW]
1937 fnclex
1938 fstp tword [A1 + IEMFPURESULT.r80Result]
1939
1940 fninit
1941 add xSP, 20h
1942 EPILOGUE_4_ARGS
1943ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1944%endmacro
1945
1946IEMIMPL_FPU_R80_BY_I16 fiadd
1947IEMIMPL_FPU_R80_BY_I16 fimul
1948IEMIMPL_FPU_R80_BY_I16 fisub
1949IEMIMPL_FPU_R80_BY_I16 fisubr
1950IEMIMPL_FPU_R80_BY_I16 fidiv
1951IEMIMPL_FPU_R80_BY_I16 fidivr
1952
1953
1954;;
1955; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1956; only returning FSW.
1957;
1958; @param 1 The instruction
1959;
1960; @param A0 FPU context (fxsave).
1961; @param A1 Where to store the output FSW.
1962; @param A2 Pointer to the 80-bit value.
1963; @param A3 Pointer to the 64-bit value.
1964;
1965%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1966BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1967 PROLOGUE_4_ARGS
1968 sub xSP, 20h
1969
1970 fninit
1971 fld tword [A2]
1972 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1973 %1 word [A3]
1974
1975 fnstsw word [A1]
1976
1977 fninit
1978 add xSP, 20h
1979 EPILOGUE_4_ARGS
1980ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1981%endmacro
1982
1983IEMIMPL_FPU_R80_BY_I16_FSW ficom
1984
1985
1986
1987;
1988;---------------------- 32-bit signed integer operations ----------------------
1989;
1990
1991
1992;;
1993; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1994;
1995; @param A0 FPU context (fxsave).
1996; @param A1 Pointer to a IEMFPURESULT for the output.
1997; @param A2 Pointer to the 32-bit floating point value to convert.
1998;
1999BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
2000 PROLOGUE_3_ARGS
2001 sub xSP, 20h
2002
2003 fninit
2004 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2005 fild dword [A2]
2006
2007 fnstsw word [A1 + IEMFPURESULT.FSW]
2008 fnclex
2009 fstp tword [A1 + IEMFPURESULT.r80Result]
2010
2011 fninit
2012 add xSP, 20h
2013 EPILOGUE_3_ARGS
2014ENDPROC iemAImpl_fild_i32_to_r80
2015
2016
2017;;
2018; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2019;
2020; @param A0 FPU context (fxsave).
2021; @param A1 Where to return the output FSW.
2022; @param A2 Where to store the 32-bit signed integer value.
2023; @param A3 Pointer to the 80-bit value.
2024;
2025BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2026 PROLOGUE_4_ARGS
2027 sub xSP, 20h
2028
2029 fninit
2030 fld tword [A3]
2031 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2032 fistp dword [A2]
2033
2034 fnstsw word [A1]
2035
2036 fninit
2037 add xSP, 20h
2038 EPILOGUE_4_ARGS
2039ENDPROC iemAImpl_fist_r80_to_i32
2040
2041
2042;;
2043; Store a 80-bit floating point value (register) as a 32-bit signed integer
2044; (memory) with truncation.
2045;
2046; @param A0 FPU context (fxsave).
2047; @param A1 Where to return the output FSW.
2048; @param A2 Where to store the 32-bit signed integer value.
2049; @param A3 Pointer to the 80-bit value.
2050;
2051BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2052 PROLOGUE_4_ARGS
2053 sub xSP, 20h
2054
2055 fninit
2056 fld tword [A3]
2057 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2058 fisttp dword [A2]
2059
2060 fnstsw word [A1]
2061
2062 fninit
2063 add xSP, 20h
2064 EPILOGUE_4_ARGS
2065ENDPROC iemAImpl_fistt_r80_to_i32
2066
2067
2068;;
2069; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2070;
2071; @param 1 The instruction
2072;
2073; @param A0 FPU context (fxsave).
2074; @param A1 Pointer to a IEMFPURESULT for the output.
2075; @param A2 Pointer to the 80-bit value.
2076; @param A3 Pointer to the 32-bit value.
2077;
2078%macro IEMIMPL_FPU_R80_BY_I32 1
2079BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2080 PROLOGUE_4_ARGS
2081 sub xSP, 20h
2082
2083 fninit
2084 fld tword [A2]
2085 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2086 %1 dword [A3]
2087
2088 fnstsw word [A1 + IEMFPURESULT.FSW]
2089 fnclex
2090 fstp tword [A1 + IEMFPURESULT.r80Result]
2091
2092 fninit
2093 add xSP, 20h
2094 EPILOGUE_4_ARGS
2095ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2096%endmacro
2097
2098IEMIMPL_FPU_R80_BY_I32 fiadd
2099IEMIMPL_FPU_R80_BY_I32 fimul
2100IEMIMPL_FPU_R80_BY_I32 fisub
2101IEMIMPL_FPU_R80_BY_I32 fisubr
2102IEMIMPL_FPU_R80_BY_I32 fidiv
2103IEMIMPL_FPU_R80_BY_I32 fidivr
2104
2105
2106;;
2107; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2108; only returning FSW.
2109;
2110; @param 1 The instruction
2111;
2112; @param A0 FPU context (fxsave).
2113; @param A1 Where to store the output FSW.
2114; @param A2 Pointer to the 80-bit value.
2115; @param A3 Pointer to the 64-bit value.
2116;
2117%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2118BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2119 PROLOGUE_4_ARGS
2120 sub xSP, 20h
2121
2122 fninit
2123 fld tword [A2]
2124 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2125 %1 dword [A3]
2126
2127 fnstsw word [A1]
2128
2129 fninit
2130 add xSP, 20h
2131 EPILOGUE_4_ARGS
2132ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2133%endmacro
2134
2135IEMIMPL_FPU_R80_BY_I32_FSW ficom
2136
2137
2138
2139;
2140;---------------------- 64-bit signed integer operations ----------------------
2141;
2142
2143
2144;;
2145; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2146;
2147; @param A0 FPU context (fxsave).
2148; @param A1 Pointer to a IEMFPURESULT for the output.
2149; @param A2 Pointer to the 64-bit floating point value to convert.
2150;
2151BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2152 PROLOGUE_3_ARGS
2153 sub xSP, 20h
2154
2155 fninit
2156 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2157 fild qword [A2]
2158
2159 fnstsw word [A1 + IEMFPURESULT.FSW]
2160 fnclex
2161 fstp tword [A1 + IEMFPURESULT.r80Result]
2162
2163 fninit
2164 add xSP, 20h
2165 EPILOGUE_3_ARGS
2166ENDPROC iemAImpl_fild_i64_to_r80
2167
2168
2169;;
2170; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2171;
2172; @param A0 FPU context (fxsave).
2173; @param A1 Where to return the output FSW.
2174; @param A2 Where to store the 64-bit signed integer value.
2175; @param A3 Pointer to the 80-bit value.
2176;
2177BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2178 PROLOGUE_4_ARGS
2179 sub xSP, 20h
2180
2181 fninit
2182 fld tword [A3]
2183 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2184 fistp qword [A2]
2185
2186 fnstsw word [A1]
2187
2188 fninit
2189 add xSP, 20h
2190 EPILOGUE_4_ARGS
2191ENDPROC iemAImpl_fist_r80_to_i64
2192
2193
2194;;
2195; Store a 80-bit floating point value (register) as a 64-bit signed integer
2196; (memory) with truncation.
2197;
2198; @param A0 FPU context (fxsave).
2199; @param A1 Where to return the output FSW.
2200; @param A2 Where to store the 64-bit signed integer value.
2201; @param A3 Pointer to the 80-bit value.
2202;
2203BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2204 PROLOGUE_4_ARGS
2205 sub xSP, 20h
2206
2207 fninit
2208 fld tword [A3]
2209 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2210 fisttp qword [A2]
2211
2212 fnstsw word [A1]
2213
2214 fninit
2215 add xSP, 20h
2216 EPILOGUE_4_ARGS
2217ENDPROC iemAImpl_fistt_r80_to_i64
2218
2219
2220
2221;
2222;---------------------- 32-bit floating point operations ----------------------
2223;
2224
2225;;
2226; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2227;
2228; @param A0 FPU context (fxsave).
2229; @param A1 Pointer to a IEMFPURESULT for the output.
2230; @param A2 Pointer to the 32-bit floating point value to convert.
2231;
2232BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2233 PROLOGUE_3_ARGS
2234 sub xSP, 20h
2235
2236 fninit
2237 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2238 fld dword [A2]
2239
2240 fnstsw word [A1 + IEMFPURESULT.FSW]
2241 fnclex
2242 fstp tword [A1 + IEMFPURESULT.r80Result]
2243
2244 fninit
2245 add xSP, 20h
2246 EPILOGUE_3_ARGS
2247ENDPROC iemAImpl_fld_r32_to_r80
2248
2249
2250;;
2251; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2252;
2253; @param A0 FPU context (fxsave).
2254; @param A1 Where to return the output FSW.
2255; @param A2 Where to store the 32-bit value.
2256; @param A3 Pointer to the 80-bit value.
2257;
2258BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2259 PROLOGUE_4_ARGS
2260 sub xSP, 20h
2261
2262 fninit
2263 fld tword [A3]
2264 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2265 fst dword [A2]
2266
2267 fnstsw word [A1]
2268
2269 fninit
2270 add xSP, 20h
2271 EPILOGUE_4_ARGS
2272ENDPROC iemAImpl_fst_r80_to_r32
2273
2274
2275;;
2276; FPU instruction working on one 80-bit and one 32-bit floating point value.
2277;
2278; @param 1 The instruction
2279;
2280; @param A0 FPU context (fxsave).
2281; @param A1 Pointer to a IEMFPURESULT for the output.
2282; @param A2 Pointer to the 80-bit value.
2283; @param A3 Pointer to the 32-bit value.
2284;
2285%macro IEMIMPL_FPU_R80_BY_R32 1
2286BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2287 PROLOGUE_4_ARGS
2288 sub xSP, 20h
2289
2290 fninit
2291 fld tword [A2]
2292 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2293 %1 dword [A3]
2294
2295 fnstsw word [A1 + IEMFPURESULT.FSW]
2296 fnclex
2297 fstp tword [A1 + IEMFPURESULT.r80Result]
2298
2299 fninit
2300 add xSP, 20h
2301 EPILOGUE_4_ARGS
2302ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2303%endmacro
2304
2305IEMIMPL_FPU_R80_BY_R32 fadd
2306IEMIMPL_FPU_R80_BY_R32 fmul
2307IEMIMPL_FPU_R80_BY_R32 fsub
2308IEMIMPL_FPU_R80_BY_R32 fsubr
2309IEMIMPL_FPU_R80_BY_R32 fdiv
2310IEMIMPL_FPU_R80_BY_R32 fdivr
2311
2312
2313;;
2314; FPU instruction working on one 80-bit and one 32-bit floating point value,
2315; only returning FSW.
2316;
2317; @param 1 The instruction
2318;
2319; @param A0 FPU context (fxsave).
2320; @param A1 Where to store the output FSW.
2321; @param A2 Pointer to the 80-bit value.
2322; @param A3 Pointer to the 64-bit value.
2323;
2324%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2326 PROLOGUE_4_ARGS
2327 sub xSP, 20h
2328
2329 fninit
2330 fld tword [A2]
2331 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2332 %1 dword [A3]
2333
2334 fnstsw word [A1]
2335
2336 fninit
2337 add xSP, 20h
2338 EPILOGUE_4_ARGS
2339ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2340%endmacro
2341
2342IEMIMPL_FPU_R80_BY_R32_FSW fcom
2343
2344
2345
2346;
2347;---------------------- 64-bit floating point operations ----------------------
2348;
2349
2350;;
2351; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2352;
2353; @param A0 FPU context (fxsave).
2354; @param A1 Pointer to a IEMFPURESULT for the output.
2355; @param A2 Pointer to the 64-bit floating point value to convert.
2356;
2357BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2358 PROLOGUE_3_ARGS
2359 sub xSP, 20h
2360
2361 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2362 fld qword [A2]
2363
2364 fnstsw word [A1 + IEMFPURESULT.FSW]
2365 fnclex
2366 fstp tword [A1 + IEMFPURESULT.r80Result]
2367
2368 fninit
2369 add xSP, 20h
2370 EPILOGUE_3_ARGS
2371ENDPROC iemAImpl_fld_r64_to_r80
2372
2373
2374;;
2375; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2376;
2377; @param A0 FPU context (fxsave).
2378; @param A1 Where to return the output FSW.
2379; @param A2 Where to store the 64-bit value.
2380; @param A3 Pointer to the 80-bit value.
2381;
2382BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2383 PROLOGUE_4_ARGS
2384 sub xSP, 20h
2385
2386 fninit
2387 fld tword [A3]
2388 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2389 fst qword [A2]
2390
2391 fnstsw word [A1]
2392
2393 fninit
2394 add xSP, 20h
2395 EPILOGUE_4_ARGS
2396ENDPROC iemAImpl_fst_r80_to_r64
2397
2398
2399;;
2400; FPU instruction working on one 80-bit and one 64-bit floating point value.
2401;
2402; @param 1 The instruction
2403;
2404; @param A0 FPU context (fxsave).
2405; @param A1 Pointer to a IEMFPURESULT for the output.
2406; @param A2 Pointer to the 80-bit value.
2407; @param A3 Pointer to the 64-bit value.
2408;
2409%macro IEMIMPL_FPU_R80_BY_R64 1
2410BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2411 PROLOGUE_4_ARGS
2412 sub xSP, 20h
2413
2414 fninit
2415 fld tword [A2]
2416 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2417 %1 qword [A3]
2418
2419 fnstsw word [A1 + IEMFPURESULT.FSW]
2420 fnclex
2421 fstp tword [A1 + IEMFPURESULT.r80Result]
2422
2423 fninit
2424 add xSP, 20h
2425 EPILOGUE_4_ARGS
2426ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2427%endmacro
2428
2429IEMIMPL_FPU_R80_BY_R64 fadd
2430IEMIMPL_FPU_R80_BY_R64 fmul
2431IEMIMPL_FPU_R80_BY_R64 fsub
2432IEMIMPL_FPU_R80_BY_R64 fsubr
2433IEMIMPL_FPU_R80_BY_R64 fdiv
2434IEMIMPL_FPU_R80_BY_R64 fdivr
2435
2436;;
2437; FPU instruction working on one 80-bit and one 64-bit floating point value,
2438; only returning FSW.
2439;
2440; @param 1 The instruction
2441;
2442; @param A0 FPU context (fxsave).
2443; @param A1 Where to store the output FSW.
2444; @param A2 Pointer to the 80-bit value.
2445; @param A3 Pointer to the 64-bit value.
2446;
2447%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2448BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2449 PROLOGUE_4_ARGS
2450 sub xSP, 20h
2451
2452 fninit
2453 fld tword [A2]
2454 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2455 %1 qword [A3]
2456
2457 fnstsw word [A1]
2458
2459 fninit
2460 add xSP, 20h
2461 EPILOGUE_4_ARGS
2462ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2463%endmacro
2464
2465IEMIMPL_FPU_R80_BY_R64_FSW fcom
2466
2467
2468
2469;
2470;---------------------- 80-bit floating point operations ----------------------
2471;
2472
2473;;
2474; Loads a 80-bit floating point register value from memory.
2475;
2476; @param A0 FPU context (fxsave).
2477; @param A1 Pointer to a IEMFPURESULT for the output.
2478; @param A2 Pointer to the 80-bit floating point value to load.
2479;
2480BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2481 PROLOGUE_3_ARGS
2482 sub xSP, 20h
2483
2484 fninit
2485 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2486 fld tword [A2]
2487
2488 fnstsw word [A1 + IEMFPURESULT.FSW]
2489 fnclex
2490 fstp tword [A1 + IEMFPURESULT.r80Result]
2491
2492 fninit
2493 add xSP, 20h
2494 EPILOGUE_3_ARGS
2495ENDPROC iemAImpl_fld_r80_from_r80
2496
2497
2498;;
2499; Store a 80-bit floating point register to memory
2500;
2501; @param A0 FPU context (fxsave).
2502; @param A1 Where to return the output FSW.
2503; @param A2 Where to store the 80-bit value.
2504; @param A3 Pointer to the 80-bit register value.
2505;
2506BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2507 PROLOGUE_4_ARGS
2508 sub xSP, 20h
2509
2510 fninit
2511 fld tword [A3]
2512 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2513 fstp tword [A2]
2514
2515 fnstsw word [A1]
2516
2517 fninit
2518 add xSP, 20h
2519 EPILOGUE_4_ARGS
2520ENDPROC iemAImpl_fst_r80_to_r80
2521
2522
2523;;
2524; FPU instruction working on two 80-bit floating point values.
2525;
2526; @param 1 The instruction
2527;
2528; @param A0 FPU context (fxsave).
2529; @param A1 Pointer to a IEMFPURESULT for the output.
2530; @param A2 Pointer to the first 80-bit value (ST0)
2531; @param A3 Pointer to the second 80-bit value (STn).
2532;
2533%macro IEMIMPL_FPU_R80_BY_R80 2
2534BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2535 PROLOGUE_4_ARGS
2536 sub xSP, 20h
2537
2538 fninit
2539 fld tword [A3]
2540 fld tword [A2]
2541 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2542 %1 %2
2543
2544 fnstsw word [A1 + IEMFPURESULT.FSW]
2545 fnclex
2546 fstp tword [A1 + IEMFPURESULT.r80Result]
2547
2548 fninit
2549 add xSP, 20h
2550 EPILOGUE_4_ARGS
2551ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2552%endmacro
2553
2554IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2555IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2556IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2557IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2558IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2559IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2560IEMIMPL_FPU_R80_BY_R80 fprem, {}
2561IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2562IEMIMPL_FPU_R80_BY_R80 fscale, {}
2563
2564
2565;;
2566; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2567; storing the result in ST1 and popping the stack.
2568;
2569; @param 1 The instruction
2570;
2571; @param A0 FPU context (fxsave).
2572; @param A1 Pointer to a IEMFPURESULT for the output.
2573; @param A2 Pointer to the first 80-bit value (ST1).
2574; @param A3 Pointer to the second 80-bit value (ST0).
2575;
2576%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2577BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2578 PROLOGUE_4_ARGS
2579 sub xSP, 20h
2580
2581 fninit
2582 fld tword [A2]
2583 fld tword [A3]
2584 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2585 %1
2586
2587 fnstsw word [A1 + IEMFPURESULT.FSW]
2588 fnclex
2589 fstp tword [A1 + IEMFPURESULT.r80Result]
2590
2591 fninit
2592 add xSP, 20h
2593 EPILOGUE_4_ARGS
2594ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2595%endmacro
2596
2597IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2598IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2599IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2600
2601
2602;;
2603; FPU instruction working on two 80-bit floating point values, only
2604; returning FSW.
2605;
2606; @param 1 The instruction
2607;
2608; @param A0 FPU context (fxsave).
2609; @param A1 Pointer to a uint16_t for the resulting FSW.
2610; @param A2 Pointer to the first 80-bit value.
2611; @param A3 Pointer to the second 80-bit value.
2612;
2613%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2614BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2615 PROLOGUE_4_ARGS
2616 sub xSP, 20h
2617
2618 fninit
2619 fld tword [A3]
2620 fld tword [A2]
2621 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2622 %1 st0, st1
2623
2624 fnstsw word [A1]
2625
2626 fninit
2627 add xSP, 20h
2628 EPILOGUE_4_ARGS
2629ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2630%endmacro
2631
2632IEMIMPL_FPU_R80_BY_R80_FSW fcom
2633IEMIMPL_FPU_R80_BY_R80_FSW fucom
2634
2635
2636;;
2637; FPU instruction working on two 80-bit floating point values,
2638; returning FSW and EFLAGS (eax).
2639;
2640; @param 1 The instruction
2641;
2642; @returns EFLAGS in EAX.
2643; @param A0 FPU context (fxsave).
2644; @param A1 Pointer to a uint16_t for the resulting FSW.
2645; @param A2 Pointer to the first 80-bit value.
2646; @param A3 Pointer to the second 80-bit value.
2647;
2648%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2649BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2650 PROLOGUE_4_ARGS
2651 sub xSP, 20h
2652
2653 fninit
2654 fld tword [A3]
2655 fld tword [A2]
2656 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2657 %1 st1
2658
2659 fnstsw word [A1]
2660 pushf
2661 pop xAX
2662
2663 fninit
2664 add xSP, 20h
2665 EPILOGUE_4_ARGS
2666ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2667%endmacro
2668
2669IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2670IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2671
2672
2673;;
2674; FPU instruction working on one 80-bit floating point value.
2675;
2676; @param 1 The instruction
2677;
2678; @param A0 FPU context (fxsave).
2679; @param A1 Pointer to a IEMFPURESULT for the output.
2680; @param A2 Pointer to the 80-bit value.
2681;
2682%macro IEMIMPL_FPU_R80 1
2683BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2684 PROLOGUE_3_ARGS
2685 sub xSP, 20h
2686
2687 fninit
2688 fld tword [A2]
2689 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2690 %1
2691
2692 fnstsw word [A1 + IEMFPURESULT.FSW]
2693 fnclex
2694 fstp tword [A1 + IEMFPURESULT.r80Result]
2695
2696 fninit
2697 add xSP, 20h
2698 EPILOGUE_3_ARGS
2699ENDPROC iemAImpl_ %+ %1 %+ _r80
2700%endmacro
2701
2702IEMIMPL_FPU_R80 fchs
2703IEMIMPL_FPU_R80 fabs
2704IEMIMPL_FPU_R80 f2xm1
2705IEMIMPL_FPU_R80 fsqrt
2706IEMIMPL_FPU_R80 frndint
2707IEMIMPL_FPU_R80 fsin
2708IEMIMPL_FPU_R80 fcos
2709
2710
2711;;
2712; FPU instruction working on one 80-bit floating point value, only
2713; returning FSW.
2714;
2715; @param 1 The instruction
2716;
2717; @param A0 FPU context (fxsave).
2718; @param A1 Pointer to a uint16_t for the resulting FSW.
2719; @param A2 Pointer to the 80-bit value.
2720;
2721%macro IEMIMPL_FPU_R80_FSW 1
2722BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2723 PROLOGUE_3_ARGS
2724 sub xSP, 20h
2725
2726 fninit
2727 fld tword [A2]
2728 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2729 %1
2730
2731 fnstsw word [A1]
2732
2733 fninit
2734 add xSP, 20h
2735 EPILOGUE_3_ARGS
2736ENDPROC iemAImpl_ %+ %1 %+ _r80
2737%endmacro
2738
2739IEMIMPL_FPU_R80_FSW ftst
2740IEMIMPL_FPU_R80_FSW fxam
2741
2742
2743
2744;;
2745; FPU instruction loading a 80-bit floating point constant.
2746;
2747; @param 1 The instruction
2748;
2749; @param A0 FPU context (fxsave).
2750; @param A1 Pointer to a IEMFPURESULT for the output.
2751;
2752%macro IEMIMPL_FPU_R80_CONST 1
2753BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2754 PROLOGUE_2_ARGS
2755 sub xSP, 20h
2756
2757 fninit
2758 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2759 %1
2760
2761 fnstsw word [A1 + IEMFPURESULT.FSW]
2762 fnclex
2763 fstp tword [A1 + IEMFPURESULT.r80Result]
2764
2765 fninit
2766 add xSP, 20h
2767 EPILOGUE_2_ARGS
2768ENDPROC iemAImpl_ %+ %1 %+
2769%endmacro
2770
2771IEMIMPL_FPU_R80_CONST fld1
2772IEMIMPL_FPU_R80_CONST fldl2t
2773IEMIMPL_FPU_R80_CONST fldl2e
2774IEMIMPL_FPU_R80_CONST fldpi
2775IEMIMPL_FPU_R80_CONST fldlg2
2776IEMIMPL_FPU_R80_CONST fldln2
2777IEMIMPL_FPU_R80_CONST fldz
2778
2779
2780;;
2781; FPU instruction working on one 80-bit floating point value, outputing two.
2782;
2783; @param 1 The instruction
2784;
2785; @param A0 FPU context (fxsave).
2786; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2787; @param A2 Pointer to the 80-bit value.
2788;
2789%macro IEMIMPL_FPU_R80_R80 1
2790BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2791 PROLOGUE_3_ARGS
2792 sub xSP, 20h
2793
2794 fninit
2795 fld tword [A2]
2796 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2797 %1
2798
2799 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2800 fnclex
2801 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2802 fnclex
2803 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2804
2805 fninit
2806 add xSP, 20h
2807 EPILOGUE_3_ARGS
2808ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2809%endmacro
2810
2811IEMIMPL_FPU_R80_R80 fptan
2812IEMIMPL_FPU_R80_R80 fxtract
2813IEMIMPL_FPU_R80_R80 fsincos
2814
2815
2816
2817
2818;---------------------- SSE and MMX Operations ----------------------
2819
2820;; @todo what do we need to do for MMX?
2821%macro IEMIMPL_MMX_PROLOGUE 0
2822%endmacro
2823%macro IEMIMPL_MMX_EPILOGUE 0
2824%endmacro
2825
2826;; @todo what do we need to do for SSE?
2827%macro IEMIMPL_SSE_PROLOGUE 0
2828%endmacro
2829%macro IEMIMPL_SSE_EPILOGUE 0
2830%endmacro
2831
2832
2833;;
2834; Media instruction working on two full sized registers.
2835;
2836; @param 1 The instruction
2837;
2838; @param A0 FPU context (fxsave).
2839; @param A1 Pointer to the first media register size operand (input/output).
2840; @param A2 Pointer to the second media register size operand (input).
2841;
2842%macro IEMIMPL_MEDIA_F2 1
2843BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2844 PROLOGUE_3_ARGS
2845 IEMIMPL_MMX_PROLOGUE
2846
2847 movq mm0, [A1]
2848 movq mm1, [A2]
2849 %1 mm0, mm1
2850 movq [A1], mm0
2851
2852 IEMIMPL_MMX_EPILOGUE
2853 EPILOGUE_3_ARGS
2854ENDPROC iemAImpl_ %+ %1 %+ _u64
2855
2856BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2857 PROLOGUE_3_ARGS
2858 IEMIMPL_SSE_PROLOGUE
2859
2860 movdqu xmm0, [A1]
2861 movdqu xmm1, [A2]
2862 %1 xmm0, xmm1
2863 movdqu [A1], xmm0
2864
2865 IEMIMPL_SSE_EPILOGUE
2866 EPILOGUE_3_ARGS
2867ENDPROC iemAImpl_ %+ %1 %+ _u128
2868%endmacro
2869
2870IEMIMPL_MEDIA_F2 pxor
2871IEMIMPL_MEDIA_F2 pcmpeqb
2872IEMIMPL_MEDIA_F2 pcmpeqw
2873IEMIMPL_MEDIA_F2 pcmpeqd
2874
2875
2876;;
2877; Media instruction working on one full sized and one half sized register (lower half).
2878;
2879; @param 1 The instruction
2880; @param 2 1 if MMX is included, 0 if not.
2881;
2882; @param A0 FPU context (fxsave).
2883; @param A1 Pointer to the first full sized media register operand (input/output).
2884; @param A2 Pointer to the second half sized media register operand (input).
2885;
2886%macro IEMIMPL_MEDIA_F1L1 2
2887 %if %2 != 0
2888BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2889 PROLOGUE_3_ARGS
2890 IEMIMPL_MMX_PROLOGUE
2891
2892 movq mm0, [A1]
2893 movd mm1, [A2]
2894 %1 mm0, mm1
2895 movq [A1], mm0
2896
2897 IEMIMPL_MMX_EPILOGUE
2898 EPILOGUE_3_ARGS
2899ENDPROC iemAImpl_ %+ %1 %+ _u64
2900 %endif
2901
2902BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2903 PROLOGUE_3_ARGS
2904 IEMIMPL_SSE_PROLOGUE
2905
2906 movdqu xmm0, [A1]
2907 movq xmm1, [A2]
2908 %1 xmm0, xmm1
2909 movdqu [A1], xmm0
2910
2911 IEMIMPL_SSE_EPILOGUE
2912 EPILOGUE_3_ARGS
2913ENDPROC iemAImpl_ %+ %1 %+ _u128
2914%endmacro
2915
2916IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2917IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2918IEMIMPL_MEDIA_F1L1 punpckldq, 1
2919IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2920
2921
2922;;
2923; Media instruction working on one full sized and one half sized register (high half).
2924;
2925; @param 1 The instruction
2926; @param 2 1 if MMX is included, 0 if not.
2927;
2928; @param A0 FPU context (fxsave).
2929; @param A1 Pointer to the first full sized media register operand (input/output).
2930; @param A2 Pointer to the second full sized media register operand, where we
2931; will only use the upper half (input).
2932;
2933%macro IEMIMPL_MEDIA_F1H1 2
2934 %if %2 != 0
2935BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2936 PROLOGUE_3_ARGS
2937 IEMIMPL_MMX_PROLOGUE
2938
2939 movq mm0, [A1]
2940 movq mm1, [A2]
2941 %1 mm0, mm1
2942 movq [A1], mm0
2943
2944 IEMIMPL_MMX_EPILOGUE
2945 EPILOGUE_3_ARGS
2946ENDPROC iemAImpl_ %+ %1 %+ _u64
2947 %endif
2948
2949BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2950 PROLOGUE_3_ARGS
2951 IEMIMPL_SSE_PROLOGUE
2952
2953 movdqu xmm0, [A1]
2954 movdqu xmm1, [A2]
2955 %1 xmm0, xmm1
2956 movdqu [A1], xmm0
2957
2958 IEMIMPL_SSE_EPILOGUE
2959 EPILOGUE_3_ARGS
2960ENDPROC iemAImpl_ %+ %1 %+ _u128
2961%endmacro
2962
2963IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2964IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2965IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2966IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2967
2968
2969;
2970; Shufflers with evil 8-bit immediates.
2971;
2972
2973BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2974 PROLOGUE_4_ARGS
2975 IEMIMPL_MMX_PROLOGUE
2976
2977 movq mm0, [A1]
2978 movq mm1, [A2]
2979 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2980 lea T1, [.imm0 xWrtRIP]
2981 lea T1, [T1 + T0]
2982 call T1
2983 movq [A1], mm0
2984
2985 IEMIMPL_MMX_EPILOGUE
2986 EPILOGUE_4_ARGS
2987%assign bImm 0
2988%rep 256
2989.imm %+ bImm:
2990 pshufw mm0, mm1, bImm
2991 ret
2992 %assign bImm bImm + 1
2993%endrep
2994.immEnd: ; 256*5 == 0x500
2995dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2996dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2997ENDPROC iemAImpl_pshufw
2998
2999
3000%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3001BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
3002 PROLOGUE_4_ARGS
3003 IEMIMPL_SSE_PROLOGUE
3004
3005 movdqu xmm0, [A1]
3006 movdqu xmm1, [A2]
3007 lea T1, [.imm0 xWrtRIP]
3008 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
3009 lea T1, [T1 + T0*2]
3010 call T1
3011 movdqu [A1], xmm0
3012
3013 IEMIMPL_SSE_EPILOGUE
3014 EPILOGUE_4_ARGS
3015 %assign bImm 0
3016 %rep 256
3017.imm %+ bImm:
3018 %1 xmm0, xmm1, bImm
3019 ret
3020 %assign bImm bImm + 1
3021 %endrep
3022.immEnd: ; 256*6 == 0x600
3023dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3024dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3025ENDPROC iemAImpl_ %+ %1
3026%endmacro
3027
3028IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3029IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3030IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3031
3032
3033;
3034; Move byte mask.
3035;
3036
3037BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
3038 PROLOGUE_3_ARGS
3039 IEMIMPL_MMX_PROLOGUE
3040
3041 mov T0, [A1]
3042 movq mm1, [A2]
3043 pmovmskb T0, mm1
3044 mov [A1], T0
3045%ifdef RT_ARCH_X86
3046 mov dword [A1 + 4], 0
3047%endif
3048 IEMIMPL_MMX_EPILOGUE
3049 EPILOGUE_3_ARGS
3050ENDPROC iemAImpl_pmovmskb_u64
3051
3052BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3053 PROLOGUE_3_ARGS
3054 IEMIMPL_SSE_PROLOGUE
3055
3056 mov T0, [A1]
3057 movdqu xmm1, [A2]
3058 pmovmskb T0, xmm1
3059 mov [A1], T0
3060%ifdef RT_ARCH_X86
3061 mov dword [A1 + 4], 0
3062%endif
3063 IEMIMPL_SSE_EPILOGUE
3064 EPILOGUE_3_ARGS
3065ENDPROC iemAImpl_pmovmskb_u128
3066
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette