IEMAllAImpl.asm@ 95499

Last change on this file since 95499 was 95499, checked in by vboxsync, 3 years ago
VMM/IEM: [v]pshufhb. bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 112.5 KB

Line
1	; $Id: IEMAllAImpl.asm 95499 2022-07-04 12:52:29Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	;
7	; Copyright (C) 2011-2022 Oracle Corporation
8	;
9	; This file is part of VirtualBox Open Source Edition (OSE), as
10	; available from http://www.virtualbox.org. This file is free software;
11	; you can redistribute it and/or modify it under the terms of the GNU
12	; General Public License (GPL) as published by the Free Software
13	; Foundation, in version 2 as it comes in the "COPYING" file of the
14	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	;
17
18
19	;*********************************************************************************************************************************
20	;* Header Files *
21	;*********************************************************************************************************************************
22	%include "VBox/asmdefs.mac"
23	%include "VBox/err.mac"
24	%include "iprt/x86.mac"
25
26
27	;*********************************************************************************************************************************
28	;* Defined Constants And Macros *
29	;*********************************************************************************************************************************
30
31	;;
32	; RET XX / RET wrapper for fastcall.
33	;
34	%macro RET_FASTCALL 1
35	%ifdef RT_ARCH_X86
36	%ifdef RT_OS_WINDOWS
37	ret %1
38	%else
39	ret
40	%endif
41	%else
42	ret
43	%endif
44	%endmacro
45
46	;;
47	; NAME for fastcall functions.
48	;
49	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50	; escaping (or whatever the dollar is good for here). Thus the ugly
51	; prefix argument.
52	;
53	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54	%ifdef RT_ARCH_X86
55	%ifdef RT_OS_WINDOWS
56	%undef NAME_FASTCALL
57	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58	%endif
59	%endif
60
61	;;
62	; BEGINPROC for fastcall functions.
63	;
64	; @param 1 The function name (C).
65	; @param 2 The argument size on x86.
66	;
67	%macro BEGINPROC_FASTCALL 2
68	%ifdef ASM_FORMAT_PE
69	export %1=NAME_FASTCALL(%1,%2,$@)
70	%endif
71	%ifdef __NASM__
72	%ifdef ASM_FORMAT_OMF
73	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74	%endif
75	%endif
76	%ifndef ASM_FORMAT_BIN
77	global NAME_FASTCALL(%1,%2,$@)
78	%endif
79	NAME_FASTCALL(%1,%2,@):
80	%endmacro
81
82
83	;
84	; We employ some macro assembly here to hid the calling convention differences.
85	;
86	%ifdef RT_ARCH_AMD64
87	%macro PROLOGUE_1_ARGS 0
88	%endmacro
89	%macro EPILOGUE_1_ARGS 0
90	ret
91	%endmacro
92	%macro EPILOGUE_1_ARGS_EX 0
93	ret
94	%endmacro
95
96	%macro PROLOGUE_2_ARGS 0
97	%endmacro
98	%macro EPILOGUE_2_ARGS 0
99	ret
100	%endmacro
101	%macro EPILOGUE_2_ARGS_EX 1
102	ret
103	%endmacro
104
105	%macro PROLOGUE_3_ARGS 0
106	%endmacro
107	%macro EPILOGUE_3_ARGS 0
108	ret
109	%endmacro
110	%macro EPILOGUE_3_ARGS_EX 1
111	ret
112	%endmacro
113
114	%macro PROLOGUE_4_ARGS 0
115	%endmacro
116	%macro EPILOGUE_4_ARGS 0
117	ret
118	%endmacro
119	%macro EPILOGUE_4_ARGS_EX 1
120	ret
121	%endmacro
122
123	%ifdef ASM_CALL64_GCC
124	%define A0 rdi
125	%define A0_32 edi
126	%define A0_16 di
127	%define A0_8 dil
128
129	%define A1 rsi
130	%define A1_32 esi
131	%define A1_16 si
132	%define A1_8 sil
133
134	%define A2 rdx
135	%define A2_32 edx
136	%define A2_16 dx
137	%define A2_8 dl
138
139	%define A3 rcx
140	%define A3_32 ecx
141	%define A3_16 cx
142	%endif
143
144	%ifdef ASM_CALL64_MSC
145	%define A0 rcx
146	%define A0_32 ecx
147	%define A0_16 cx
148	%define A0_8 cl
149
150	%define A1 rdx
151	%define A1_32 edx
152	%define A1_16 dx
153	%define A1_8 dl
154
155	%define A2 r8
156	%define A2_32 r8d
157	%define A2_16 r8w
158	%define A2_8 r8b
159
160	%define A3 r9
161	%define A3_32 r9d
162	%define A3_16 r9w
163	%endif
164
165	%define T0 rax
166	%define T0_32 eax
167	%define T0_16 ax
168	%define T0_8 al
169
170	%define T1 r11
171	%define T1_32 r11d
172	%define T1_16 r11w
173	%define T1_8 r11b
174
175	%define T2 r10 ; only AMD64
176	%define T2_32 r10d
177	%define T2_16 r10w
178	%define T2_8 r10b
179
180	%else
181	; x86
182	%macro PROLOGUE_1_ARGS 0
183	push edi
184	%endmacro
185	%macro EPILOGUE_1_ARGS 0
186	pop edi
187	ret 0
188	%endmacro
189	%macro EPILOGUE_1_ARGS_EX 1
190	pop edi
191	ret %1
192	%endmacro
193
194	%macro PROLOGUE_2_ARGS 0
195	push edi
196	%endmacro
197	%macro EPILOGUE_2_ARGS 0
198	pop edi
199	ret 0
200	%endmacro
201	%macro EPILOGUE_2_ARGS_EX 1
202	pop edi
203	ret %1
204	%endmacro
205
206	%macro PROLOGUE_3_ARGS 0
207	push ebx
208	mov ebx, [esp + 4 + 4]
209	push edi
210	%endmacro
211	%macro EPILOGUE_3_ARGS_EX 1
212	%if (%1) < 4
213	%error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
214	%endif
215	pop edi
216	pop ebx
217	ret %1
218	%endmacro
219	%macro EPILOGUE_3_ARGS 0
220	EPILOGUE_3_ARGS_EX 4
221	%endmacro
222
223	%macro PROLOGUE_4_ARGS 0
224	push ebx
225	push edi
226	push esi
227	mov ebx, [esp + 12 + 4 + 0]
228	mov esi, [esp + 12 + 4 + 4]
229	%endmacro
230	%macro EPILOGUE_4_ARGS_EX 1
231	%if (%1) < 8
232	%error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
233	%endif
234	pop esi
235	pop edi
236	pop ebx
237	ret %1
238	%endmacro
239	%macro EPILOGUE_4_ARGS 0
240	EPILOGUE_4_ARGS_EX 8
241	%endmacro
242
243	%define A0 ecx
244	%define A0_32 ecx
245	%define A0_16 cx
246	%define A0_8 cl
247
248	%define A1 edx
249	%define A1_32 edx
250	%define A1_16 dx
251	%define A1_8 dl
252
253	%define A2 ebx
254	%define A2_32 ebx
255	%define A2_16 bx
256	%define A2_8 bl
257
258	%define A3 esi
259	%define A3_32 esi
260	%define A3_16 si
261
262	%define T0 eax
263	%define T0_32 eax
264	%define T0_16 ax
265	%define T0_8 al
266
267	%define T1 edi
268	%define T1_32 edi
269	%define T1_16 di
270	%endif
271
272
273	;;
274	; Load the relevant flags from [%1] if there are undefined flags (%3).
275	;
276	; @remarks Clobbers T0, stack. Changes EFLAGS.
277	; @param A2 The register pointing to the flags.
278	; @param 1 The parameter (A0..A3) pointing to the eflags.
279	; @param 2 The set of modified flags.
280	; @param 3 The set of undefined flags.
281	;
282	%macro IEM_MAYBE_LOAD_FLAGS 3
283	;%if (%3) != 0
284	pushf ; store current flags
285	mov T0_32, [%1] ; load the guest flags
286	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
287	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
288	or [xSP], T0 ; merge guest flags with host flags.
289	popf ; load the mixed flags.
290	;%endif
291	%endmacro
292
293	;;
294	; Update the flag.
295	;
296	; @remarks Clobbers T0, T1, stack.
297	; @param 1 The register pointing to the EFLAGS.
298	; @param 2 The mask of modified flags to save.
299	; @param 3 The mask of undefined flags to (maybe) save.
300	;
301	%macro IEM_SAVE_FLAGS 3
302	%if (%2 \| %3) != 0
303	pushf
304	pop T1
305	mov T0_32, [%1] ; flags
306	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
307	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
308	or T0_32, T1_32 ; combine the flags.
309	mov [%1], T0_32 ; save the flags.
310	%endif
311	%endmacro
312
313	;;
314	; Calculates the new EFLAGS based on the CPU EFLAGS and fixed clear and set bit masks.
315	;
316	; @remarks Clobbers T0, T1, stack.
317	; @param 1 The register pointing to the EFLAGS.
318	; @param 2 The mask of modified flags to save.
319	; @param 3 Mask of additional flags to always clear
320	; @param 4 Mask of additional flags to always set.
321	;
322	%macro IEM_SAVE_AND_ADJUST_FLAGS 4
323	%if (%2 \| %3 \| %4) != 0
324	pushf
325	pop T1
326	mov T0_32, [%1] ; load flags.
327	and T0_32, ~(%2 \| %3) ; clear the modified and always cleared flags.
328	and T1_32, (%2) ; select the modified flags.
329	or T0_32, T1_32 ; combine the flags.
330	%if (%4) != 0
331	or T0_32, %4 ; add the always set flags.
332	%endif
333	mov [%1], T0_32 ; save the result.
334	%endif
335	%endmacro
336
337	;;
338	; Calculates the new EFLAGS based on the CPU EFLAGS (%2), a clear mask (%3),
339	; signed input (%4[%5]) and parity index (%6).
340	;
341	; This is used by MUL and IMUL, where we got result (%4 & %6) in xAX which is
342	; also T0. So, we have to use T1 for the EFLAGS calculation and save T0/xAX
343	; while we extract the %2 flags from the CPU EFLAGS or use T2 (only AMD64).
344	;
345	; @remarks Clobbers T0, T1, stack, %6, EFLAGS.
346	; @param 1 The register pointing to the EFLAGS.
347	; @param 2 The mask of modified flags to save.
348	; @param 3 Mask of additional flags to always clear
349	; @param 4 The result register to set SF by.
350	; @param 5 The width of the %4 register in bits (8, 16, 32, or 64).
351	; @param 6 The (full) register containing the parity table index. Will be modified!
352
353	%macro IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF 6
354	%ifdef RT_ARCH_AMD64
355	pushf
356	pop T2
357	%else
358	push T0
359	pushf
360	pop T0
361	%endif
362	mov T1_32, [%1] ; load flags.
363	and T1_32, ~(%2 \| %3 \| X86_EFL_PF \| X86_EFL_SF) ; clear the modified, always cleared flags and the two flags we calc.
364	%ifdef RT_ARCH_AMD64
365	and T2_32, (%2) ; select the modified flags.
366	or T1_32, T2_32 ; combine the flags.
367	%else
368	and T0_32, (%2) ; select the modified flags.
369	or T1_32, T0_32 ; combine the flags.
370	pop T0
371	%endif
372
373	; First calculate SF as it's likely to be refereing to the same register as %6 does.
374	bt %4, %5 - 1
375	jnc %%sf_clear
376	or T1_32, X86_EFL_SF
377	%%sf_clear:
378
379	; Parity last.
380	and %6, 0xff
381	%ifdef RT_ARCH_AMD64
382	lea T2, [NAME(g_afParity) xWrtRIP]
383	or T1_8, [T2 + %6]
384	%else
385	or T1_8, [NAME(g_afParity) + %6]
386	%endif
387
388	mov [%1], T1_32 ; save the result.
389	%endmacro
390
391	;;
392	; Calculates the new EFLAGS using fixed clear and set bit masks.
393	;
394	; @remarks Clobbers T0.
395	; @param 1 The register pointing to the EFLAGS.
396	; @param 2 Mask of additional flags to always clear
397	; @param 3 Mask of additional flags to always set.
398	;
399	%macro IEM_ADJUST_FLAGS 3
400	%if (%2 \| %3) != 0
401	mov T0_32, [%1] ; Load flags.
402	%if (%2) != 0
403	and T0_32, ~(%2) ; Remove the always cleared flags.
404	%endif
405	%if (%3) != 0
406	or T0_32, %3 ; Add the always set flags.
407	%endif
408	mov [%1], T0_32 ; Save the result.
409	%endif
410	%endmacro
411
412	;;
413	; Calculates the new EFLAGS using fixed clear and set bit masks.
414	;
415	; @remarks Clobbers T0, %4, EFLAGS.
416	; @param 1 The register pointing to the EFLAGS.
417	; @param 2 Mask of additional flags to always clear
418	; @param 3 Mask of additional flags to always set.
419	; @param 4 The (full) register containing the parity table index. Will be modified!
420	;
421	%macro IEM_ADJUST_FLAGS_WITH_PARITY 4
422	mov T0_32, [%1] ; Load flags.
423	and T0_32, ~(%2 \| X86_EFL_PF) ; Remove PF and the always cleared flags.
424	%if (%3) != 0
425	or T0_32, %3 ; Add the always set flags.
426	%endif
427	and %4, 0xff
428	%ifdef RT_ARCH_AMD64
429	lea T2, [NAME(g_afParity) xWrtRIP]
430	or T0_8, [T2 + %4]
431	%else
432	or T0_8, [NAME(g_afParity) + %4]
433	%endif
434	mov [%1], T0_32 ; Save the result.
435	%endmacro
436
437
438	;*********************************************************************************************************************************
439	;* External Symbols *
440	;*********************************************************************************************************************************
441	extern NAME(g_afParity)
442
443
444	;;
445	; Macro for implementing a binary operator.
446	;
447	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
448	; variants, except on 32-bit system where the 64-bit accesses requires hand
449	; coding.
450	;
451	; All the functions takes a pointer to the destination memory operand in A0,
452	; the source register operand in A1 and a pointer to eflags in A2.
453	;
454	; @param 1 The instruction mnemonic.
455	; @param 2 Non-zero if there should be a locked version.
456	; @param 3 The modified flags.
457	; @param 4 The undefined flags.
458	;
459	%macro IEMIMPL_BIN_OP 4
460	BEGINCODE
461	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
462	PROLOGUE_3_ARGS
463	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
464	%1 byte [A0], A1_8
465	IEM_SAVE_FLAGS A2, %3, %4
466	EPILOGUE_3_ARGS
467	ENDPROC iemAImpl_ %+ %1 %+ _u8
468
469	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
470	PROLOGUE_3_ARGS
471	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472	%1 word [A0], A1_16
473	IEM_SAVE_FLAGS A2, %3, %4
474	EPILOGUE_3_ARGS
475	ENDPROC iemAImpl_ %+ %1 %+ _u16
476
477	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
478	PROLOGUE_3_ARGS
479	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480	%1 dword [A0], A1_32
481	IEM_SAVE_FLAGS A2, %3, %4
482	EPILOGUE_3_ARGS
483	ENDPROC iemAImpl_ %+ %1 %+ _u32
484
485	%ifdef RT_ARCH_AMD64
486	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
487	PROLOGUE_3_ARGS
488	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489	%1 qword [A0], A1
490	IEM_SAVE_FLAGS A2, %3, %4
491	EPILOGUE_3_ARGS_EX 8
492	ENDPROC iemAImpl_ %+ %1 %+ _u64
493	%endif ; RT_ARCH_AMD64
494
495	%if %2 != 0 ; locked versions requested?
496
497	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
498	PROLOGUE_3_ARGS
499	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
500	lock %1 byte [A0], A1_8
501	IEM_SAVE_FLAGS A2, %3, %4
502	EPILOGUE_3_ARGS
503	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
504
505	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
506	PROLOGUE_3_ARGS
507	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
508	lock %1 word [A0], A1_16
509	IEM_SAVE_FLAGS A2, %3, %4
510	EPILOGUE_3_ARGS
511	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
512
513	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
514	PROLOGUE_3_ARGS
515	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
516	lock %1 dword [A0], A1_32
517	IEM_SAVE_FLAGS A2, %3, %4
518	EPILOGUE_3_ARGS
519	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
520
521	%ifdef RT_ARCH_AMD64
522	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
523	PROLOGUE_3_ARGS
524	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
525	lock %1 qword [A0], A1
526	IEM_SAVE_FLAGS A2, %3, %4
527	EPILOGUE_3_ARGS_EX 8
528	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
529	%endif ; RT_ARCH_AMD64
530	%endif ; locked
531	%endmacro
532
533	; instr,lock, modified-flags, undefined flags
534	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
535	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
536	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
537	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
538	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
539	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
540	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
541	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
542	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
543
544
545	;;
546	; Macro for implementing a binary operator, VEX variant with separate input/output.
547	;
548	; This will generate code for the 32 and 64 bit accesses, except on 32-bit system
549	; where the 64-bit accesses requires hand coding.
550	;
551	; All the functions takes a pointer to the destination memory operand in A0,
552	; the first source register operand in A1, the second source register operand
553	; in A2 and a pointer to eflags in A3.
554	;
555	; @param 1 The instruction mnemonic.
556	; @param 2 The modified flags.
557	; @param 3 The undefined flags.
558	;
559	%macro IEMIMPL_VEX_BIN_OP 3
560	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
561	PROLOGUE_4_ARGS
562	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
563	%1 T0_32, A1_32, A2_32
564	mov [A0], T0_32
565	IEM_SAVE_FLAGS A3, %2, %3
566	EPILOGUE_4_ARGS
567	ENDPROC iemAImpl_ %+ %1 %+ _u32
568
569	%ifdef RT_ARCH_AMD64
570	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
571	PROLOGUE_4_ARGS
572	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
573	%1 T0, A1, A2
574	mov [A0], T0
575	IEM_SAVE_FLAGS A3, %2, %3
576	EPILOGUE_4_ARGS
577	ENDPROC iemAImpl_ %+ %1 %+ _u64
578	%endif ; RT_ARCH_AMD64
579	%endmacro
580
581	; instr, modified-flags, undefined-flags
582	IEMIMPL_VEX_BIN_OP andn, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_AF \| X86_EFL_PF)
583	IEMIMPL_VEX_BIN_OP bextr, (X86_EFL_OF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF)
584	IEMIMPL_VEX_BIN_OP bzhi, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_AF \| X86_EFL_PF)
585
586	;;
587	; Macro for implementing BLSR, BLCMSK and BLSI (fallbacks implemented in C).
588	;
589	; This will generate code for the 32 and 64 bit accesses, except on 32-bit system
590	; where the 64-bit accesses requires hand coding.
591	;
592	; All the functions takes a pointer to the destination memory operand in A0,
593	; the source register operand in A1 and a pointer to eflags in A2.
594	;
595	; @param 1 The instruction mnemonic.
596	; @param 2 The modified flags.
597	; @param 3 The undefined flags.
598	;
599	%macro IEMIMPL_VEX_BIN_OP_2 3
600	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
601	PROLOGUE_4_ARGS
602	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
603	mov T0_32, [A0]
604	%1 T0_32, A1_32
605	mov [A0], T0_32
606	IEM_SAVE_FLAGS A2, %2, %3
607	EPILOGUE_4_ARGS
608	ENDPROC iemAImpl_ %+ %1 %+ _u32
609
610	%ifdef RT_ARCH_AMD64
611	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
612	PROLOGUE_4_ARGS
613	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
614	mov T0, [A0]
615	%1 T0, A1
616	mov [A0], T0
617	IEM_SAVE_FLAGS A2, %2, %3
618	EPILOGUE_4_ARGS
619	ENDPROC iemAImpl_ %+ %1 %+ _u64
620	%endif ; RT_ARCH_AMD64
621	%endmacro
622
623	; instr, modified-flags, undefined-flags
624	IEMIMPL_VEX_BIN_OP_2 blsr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_AF \| X86_EFL_PF)
625	IEMIMPL_VEX_BIN_OP_2 blsmsk, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_AF \| X86_EFL_PF)
626	IEMIMPL_VEX_BIN_OP_2 blsi, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_AF \| X86_EFL_PF)
627
628
629	;;
630	; Macro for implementing a binary operator w/o flags, VEX variant with separate input/output.
631	;
632	; This will generate code for the 32 and 64 bit accesses, except on 32-bit system
633	; where the 64-bit accesses requires hand coding.
634	;
635	; All the functions takes a pointer to the destination memory operand in A0,
636	; the first source register operand in A1, the second source register operand
637	; in A2 and a pointer to eflags in A3.
638	;
639	; @param 1 The instruction mnemonic.
640	; @param 2 Fallback instruction if applicable.
641	; @param 3 Whether to emit fallback or not.
642	;
643	%macro IEMIMPL_VEX_BIN_OP_NOEFL 3
644	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
645	PROLOGUE_3_ARGS
646	%1 T0_32, A1_32, A2_32
647	mov [A0], T0_32
648	EPILOGUE_3_ARGS
649	ENDPROC iemAImpl_ %+ %1 %+ _u32
650
651	%if %3
652	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_fallback, 12
653	PROLOGUE_3_ARGS
654	%ifdef ASM_CALL64_GCC
655	mov cl, A2_8
656	%2 A1_32, cl
657	mov [A0], A1_32
658	%else
659	xchg A2, A0
660	%2 A1_32, cl
661	mov [A2], A1_32
662	%endif
663	EPILOGUE_3_ARGS
664	ENDPROC iemAImpl_ %+ %1 %+ _u32_fallback
665	%endif
666
667	%ifdef RT_ARCH_AMD64
668	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
669	PROLOGUE_3_ARGS
670	%1 T0, A1, A2
671	mov [A0], T0
672	EPILOGUE_3_ARGS
673	ENDPROC iemAImpl_ %+ %1 %+ _u64
674
675	%if %3
676	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_fallback, 12
677	PROLOGUE_3_ARGS
678	%ifdef ASM_CALL64_GCC
679	mov cl, A2_8
680	%2 A1, cl
681	mov [A0], A1_32
682	%else
683	xchg A2, A0
684	%2 A1, cl
685	mov [A2], A1_32
686	%endif
687	mov [A0], A1
688	EPILOGUE_3_ARGS
689	ENDPROC iemAImpl_ %+ %1 %+ _u64_fallback
690	%endif
691	%endif ; RT_ARCH_AMD64
692	%endmacro
693
694	; instr, fallback instr, emit fallback
695	IEMIMPL_VEX_BIN_OP_NOEFL sarx, sar, 1
696	IEMIMPL_VEX_BIN_OP_NOEFL shlx, shl, 1
697	IEMIMPL_VEX_BIN_OP_NOEFL shrx, shr, 1
698	IEMIMPL_VEX_BIN_OP_NOEFL pdep, nop, 0
699	IEMIMPL_VEX_BIN_OP_NOEFL pext, nop, 0
700
701
702	;
703	; RORX uses a immediate byte for the shift count, so we only do
704	; fallback implementation of that one.
705	;
706	BEGINPROC_FASTCALL iemAImpl_rorx_u32, 12
707	PROLOGUE_3_ARGS
708	%ifdef ASM_CALL64_GCC
709	mov cl, A2_8
710	ror A1_32, cl
711	mov [A0], A1_32
712	%else
713	xchg A2, A0
714	ror A1_32, cl
715	mov [A2], A1_32
716	%endif
717	EPILOGUE_3_ARGS
718	ENDPROC iemAImpl_rorx_u32
719
720	%ifdef RT_ARCH_AMD64
721	BEGINPROC_FASTCALL iemAImpl_rorx_u64, 12
722	PROLOGUE_3_ARGS
723	%ifdef ASM_CALL64_GCC
724	mov cl, A2_8
725	ror A1, cl
726	mov [A0], A1_32
727	%else
728	xchg A2, A0
729	ror A1, cl
730	mov [A2], A1_32
731	%endif
732	mov [A0], A1
733	EPILOGUE_3_ARGS
734	ENDPROC iemAImpl_rorx_u64
735	%endif ; RT_ARCH_AMD64
736
737
738	;
739	; MULX
740	;
741	BEGINPROC_FASTCALL iemAImpl_mulx_u32, 16
742	PROLOGUE_4_ARGS
743	%ifdef ASM_CALL64_GCC
744	; A2_32 is EDX - prefect
745	mulx T0_32, T1_32, A3_32
746	mov [A1], T1_32 ; Low value first, as we should return the high part if same destination registers.
747	mov [A0], T0_32
748	%else
749	; A1 is xDX - must switch A1 and A2, so EDX=uSrc1
750	xchg A1, A2
751	mulx T0_32, T1_32, A3_32
752	mov [A2], T1_32 ; Low value first, as we should return the high part if same destination registers.
753	mov [A0], T0_32
754	%endif
755	EPILOGUE_4_ARGS
756	ENDPROC iemAImpl_mulx_u32
757
758
759	BEGINPROC_FASTCALL iemAImpl_mulx_u32_fallback, 16
760	PROLOGUE_4_ARGS
761	%ifdef ASM_CALL64_GCC
762	; A2_32 is EDX, T0_32 is EAX
763	mov eax, A3_32
764	mul A2_32
765	mov [A1], eax ; Low value first, as we should return the high part if same destination registers.
766	mov [A0], edx
767	%else
768	; A1 is xDX, T0_32 is EAX - must switch A1 and A2, so EDX=uSrc1
769	xchg A1, A2
770	mov eax, A3_32
771	mul A2_32
772	mov [A2], eax ; Low value first, as we should return the high part if same destination registers.
773	mov [A0], edx
774	%endif
775	EPILOGUE_4_ARGS
776	ENDPROC iemAImpl_mulx_u32_fallback
777
778	%ifdef RT_ARCH_AMD64
779	BEGINPROC_FASTCALL iemAImpl_mulx_u64, 16
780	PROLOGUE_4_ARGS
781	%ifdef ASM_CALL64_GCC
782	; A2 is RDX - prefect
783	mulx T0, T1, A3
784	mov [A1], T1 ; Low value first, as we should return the high part if same destination registers.
785	mov [A0], T0
786	%else
787	; A1 is xDX - must switch A1 and A2, so RDX=uSrc1
788	xchg A1, A2
789	mulx T0, T1, A3
790	mov [A2], T1 ; Low value first, as we should return the high part if same destination registers.
791	mov [A0], T0
792	%endif
793	EPILOGUE_4_ARGS
794	ENDPROC iemAImpl_mulx_u64
795
796
797	BEGINPROC_FASTCALL iemAImpl_mulx_u64_fallback, 16
798	PROLOGUE_4_ARGS
799	%ifdef ASM_CALL64_GCC
800	; A2 is RDX, T0 is RAX
801	mov rax, A3
802	mul A2
803	mov [A1], rax ; Low value first, as we should return the high part if same destination registers.
804	mov [A0], rdx
805	%else
806	; A1 is xDX, T0 is RAX - must switch A1 and A2, so RDX=uSrc1
807	xchg A1, A2
808	mov rax, A3
809	mul A2
810	mov [A2], rax ; Low value first, as we should return the high part if same destination registers.
811	mov [A0], rdx
812	%endif
813	EPILOGUE_4_ARGS
814	ENDPROC iemAImpl_mulx_u64_fallback
815
816	%endif
817
818
819	;;
820	; Macro for implementing a bit operator.
821	;
822	; This will generate code for the 16, 32 and 64 bit accesses with locked
823	; variants, except on 32-bit system where the 64-bit accesses requires hand
824	; coding.
825	;
826	; All the functions takes a pointer to the destination memory operand in A0,
827	; the source register operand in A1 and a pointer to eflags in A2.
828	;
829	; @param 1 The instruction mnemonic.
830	; @param 2 Non-zero if there should be a locked version.
831	; @param 3 The modified flags.
832	; @param 4 The undefined flags.
833	;
834	%macro IEMIMPL_BIT_OP 4
835	BEGINCODE
836	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
837	PROLOGUE_3_ARGS
838	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
839	%1 word [A0], A1_16
840	IEM_SAVE_FLAGS A2, %3, %4
841	EPILOGUE_3_ARGS
842	ENDPROC iemAImpl_ %+ %1 %+ _u16
843
844	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
845	PROLOGUE_3_ARGS
846	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
847	%1 dword [A0], A1_32
848	IEM_SAVE_FLAGS A2, %3, %4
849	EPILOGUE_3_ARGS
850	ENDPROC iemAImpl_ %+ %1 %+ _u32
851
852	%ifdef RT_ARCH_AMD64
853	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
854	PROLOGUE_3_ARGS
855	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
856	%1 qword [A0], A1
857	IEM_SAVE_FLAGS A2, %3, %4
858	EPILOGUE_3_ARGS_EX 8
859	ENDPROC iemAImpl_ %+ %1 %+ _u64
860	%endif ; RT_ARCH_AMD64
861
862	%if %2 != 0 ; locked versions requested?
863
864	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
865	PROLOGUE_3_ARGS
866	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
867	lock %1 word [A0], A1_16
868	IEM_SAVE_FLAGS A2, %3, %4
869	EPILOGUE_3_ARGS
870	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
871
872	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
873	PROLOGUE_3_ARGS
874	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
875	lock %1 dword [A0], A1_32
876	IEM_SAVE_FLAGS A2, %3, %4
877	EPILOGUE_3_ARGS
878	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
879
880	%ifdef RT_ARCH_AMD64
881	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
882	PROLOGUE_3_ARGS
883	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
884	lock %1 qword [A0], A1
885	IEM_SAVE_FLAGS A2, %3, %4
886	EPILOGUE_3_ARGS_EX 8
887	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
888	%endif ; RT_ARCH_AMD64
889	%endif ; locked
890	%endmacro
891	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
892	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
893	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
894	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
895
896	;;
897	; Macro for implementing a bit search operator.
898	;
899	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
900	; system where the 64-bit accesses requires hand coding.
901	;
902	; All the functions takes a pointer to the destination memory operand in A0,
903	; the source register operand in A1 and a pointer to eflags in A2.
904	;
905	; In the ZF case the destination register is 'undefined', however it seems that
906	; both AMD and Intel just leaves it as is. The undefined EFLAGS differs between
907	; AMD and Intel and accoridng to https://www.sandpile.org/x86/flags.htm between
908	; Intel microarchitectures. We only implement 'intel' and 'amd' variation with
909	; the behaviour of more recent CPUs (Intel 10980X and AMD 3990X).
910	;
911	; @param 1 The instruction mnemonic.
912	; @param 2 The modified flags.
913	; @param 3 The undefined flags.
914	; @param 4 Non-zero if destination isn't written when ZF=1. Zero if always written.
915	;
916	%macro IEMIMPL_BIT_OP2 4
917	BEGINCODE
918	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
919	PROLOGUE_3_ARGS
920	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
921	%1 T0_16, A1_16
922	%if %4 != 0
923	jz .unchanged_dst
924	%endif
925	mov [A0], T0_16
926	.unchanged_dst:
927	IEM_SAVE_FLAGS A2, %2, %3
928	EPILOGUE_3_ARGS
929	ENDPROC iemAImpl_ %+ %1 %+ _u16
930
931	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12
932	PROLOGUE_3_ARGS
933	%1 T1_16, A1_16
934	%if %4 != 0
935	jz .unchanged_dst
936	%endif
937	mov [A0], T1_16
938	IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF \| X86_EFL_ZF, 0, T1
939	EPILOGUE_3_ARGS
940	.unchanged_dst:
941	IEM_ADJUST_FLAGS A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF, X86_EFL_ZF \| X86_EFL_PF
942	EPILOGUE_3_ARGS
943	ENDPROC iemAImpl_ %+ %1 %+ _u16_intel
944
945	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12
946	PROLOGUE_3_ARGS
947	%1 T0_16, A1_16
948	%if %4 != 0
949	jz .unchanged_dst
950	%endif
951	mov [A0], T0_16
952	.unchanged_dst:
953	IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
954	EPILOGUE_3_ARGS
955	ENDPROC iemAImpl_ %+ %1 %+ _u16_amd
956
957
958	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
959	PROLOGUE_3_ARGS
960	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
961	%1 T0_32, A1_32
962	%if %4 != 0
963	jz .unchanged_dst
964	%endif
965	mov [A0], T0_32
966	.unchanged_dst:
967	IEM_SAVE_FLAGS A2, %2, %3
968	EPILOGUE_3_ARGS
969	ENDPROC iemAImpl_ %+ %1 %+ _u32
970
971	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 12
972	PROLOGUE_3_ARGS
973	%1 T1_32, A1_32
974	%if %4 != 0
975	jz .unchanged_dst
976	%endif
977	mov [A0], T1_32
978	IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF \| X86_EFL_ZF, 0, T1
979	EPILOGUE_3_ARGS
980	.unchanged_dst:
981	IEM_ADJUST_FLAGS A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF, X86_EFL_ZF \| X86_EFL_PF
982	EPILOGUE_3_ARGS
983	ENDPROC iemAImpl_ %+ %1 %+ _u32_intel
984
985	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 12
986	PROLOGUE_3_ARGS
987	%1 T0_32, A1_32
988	%if %4 != 0
989	jz .unchanged_dst
990	%endif
991	mov [A0], T0_32
992	.unchanged_dst:
993	IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
994	EPILOGUE_3_ARGS
995	ENDPROC iemAImpl_ %+ %1 %+ _u32_amd
996
997
998	%ifdef RT_ARCH_AMD64
999
1000	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
1001	PROLOGUE_3_ARGS
1002	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1003	%1 T0, A1
1004	%if %4 != 0
1005	jz .unchanged_dst
1006	%endif
1007	mov [A0], T0
1008	.unchanged_dst:
1009	IEM_SAVE_FLAGS A2, %2, %3
1010	EPILOGUE_3_ARGS_EX 8
1011	ENDPROC iemAImpl_ %+ %1 %+ _u64
1012
1013	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16
1014	PROLOGUE_3_ARGS
1015	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1016	%1 T1, A1
1017	%if %4 != 0
1018	jz .unchanged_dst
1019	%endif
1020	mov [A0], T1
1021	IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF \| X86_EFL_ZF, 0, T1
1022	EPILOGUE_3_ARGS
1023	.unchanged_dst:
1024	IEM_ADJUST_FLAGS A2, X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_CF, X86_EFL_ZF \| X86_EFL_PF
1025	EPILOGUE_3_ARGS
1026	ENDPROC iemAImpl_ %+ %1 %+ _u64_intel
1027
1028	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16
1029	PROLOGUE_3_ARGS
1030	%1 T0, A1
1031	%if %4 != 0
1032	jz .unchanged_dst
1033	%endif
1034	mov [A0], T0
1035	.unchanged_dst:
1036	IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
1037	EPILOGUE_3_ARGS_EX 8
1038	ENDPROC iemAImpl_ %+ %1 %+ _u64_amd
1039
1040	%endif ; RT_ARCH_AMD64
1041	%endmacro
1042
1043	IEMIMPL_BIT_OP2 bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 1
1044	IEMIMPL_BIT_OP2 bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 1
1045	IEMIMPL_BIT_OP2 tzcnt, (X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF), 0
1046	IEMIMPL_BIT_OP2 lzcnt, (X86_EFL_ZF \| X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF), 0
1047
1048
1049	;;
1050	; Macro for implementing POPCNT.
1051	;
1052	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
1053	; system where the 64-bit accesses requires hand coding.
1054	;
1055	; All the functions takes a pointer to the destination memory operand in A0,
1056	; the source register operand in A1 and a pointer to eflags in A2.
1057	;
1058	; ASSUMES Intel and AMD set EFLAGS the same way.
1059	;
1060	; ASSUMES the instruction does not support memory destination.
1061	;
1062	; @param 1 The instruction mnemonic.
1063	; @param 2 The modified flags.
1064	; @param 3 The undefined flags.
1065	;
1066	%macro IEMIMPL_BIT_OP3 3
1067	BEGINCODE
1068	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1069	PROLOGUE_3_ARGS
1070	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1071	%1 T0_16, A1_16
1072	mov [A0], T0_16
1073	IEM_SAVE_FLAGS A2, %2, %3
1074	EPILOGUE_3_ARGS
1075	ENDPROC iemAImpl_ %+ %1 %+ _u16
1076
1077	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1078	PROLOGUE_3_ARGS
1079	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1080	%1 T0_32, A1_32
1081	mov [A0], T0_32
1082	IEM_SAVE_FLAGS A2, %2, %3
1083	EPILOGUE_3_ARGS
1084	ENDPROC iemAImpl_ %+ %1 %+ _u32
1085
1086	%ifdef RT_ARCH_AMD64
1087	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
1088	PROLOGUE_3_ARGS
1089	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1090	%1 T0, A1
1091	mov [A0], T0
1092	IEM_SAVE_FLAGS A2, %2, %3
1093	EPILOGUE_3_ARGS_EX 8
1094	ENDPROC iemAImpl_ %+ %1 %+ _u64
1095	%endif ; RT_ARCH_AMD64
1096	%endmacro
1097	IEMIMPL_BIT_OP3 popcnt, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF), 0
1098
1099
1100	;
1101	; IMUL is also a similar but yet different case (no lock, no mem dst).
1102	; The rDX:rAX variant of imul is handled together with mul further down.
1103	;
1104	BEGINCODE
1105	; @param 1 EFLAGS that are modified.
1106	; @param 2 Undefined EFLAGS.
1107	; @param 3 Function suffix.
1108	; @param 4 EFLAGS variation: 0 for native, 1 for intel (ignored),
1109	; 2 for AMD (set AF, clear PF, ZF and SF).
1110	%macro IEMIMPL_IMUL_TWO 4
1111	BEGINPROC_FASTCALL iemAImpl_imul_two_u16 %+ %3, 12
1112	PROLOGUE_3_ARGS
1113	IEM_MAYBE_LOAD_FLAGS A2, %1, %2
1114	imul A1_16, word [A0]
1115	mov [A0], A1_16
1116	%if %4 != 1
1117	IEM_SAVE_FLAGS A2, %1, %2
1118	%else
1119	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF \| X86_EFL_ZF, A1_16, 16, A1
1120	%endif
1121	EPILOGUE_3_ARGS
1122	ENDPROC iemAImpl_imul_two_u16 %+ %3
1123
1124	BEGINPROC_FASTCALL iemAImpl_imul_two_u32 %+ %3, 12
1125	PROLOGUE_3_ARGS
1126	IEM_MAYBE_LOAD_FLAGS A2, %1, %2
1127	imul A1_32, dword [A0]
1128	mov [A0], A1_32
1129	%if %4 != 1
1130	IEM_SAVE_FLAGS A2, %1, %2
1131	%else
1132	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF \| X86_EFL_ZF, A1_32, 32, A1
1133	%endif
1134	EPILOGUE_3_ARGS
1135	ENDPROC iemAImpl_imul_two_u32 %+ %3
1136
1137	%ifdef RT_ARCH_AMD64
1138	BEGINPROC_FASTCALL iemAImpl_imul_two_u64 %+ %3, 16
1139	PROLOGUE_3_ARGS
1140	IEM_MAYBE_LOAD_FLAGS A2, %1, %2
1141	imul A1, qword [A0]
1142	mov [A0], A1
1143	%if %4 != 1
1144	IEM_SAVE_FLAGS A2, %1, %2
1145	%else
1146	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF \| X86_EFL_ZF, A1, 64, A1
1147	%endif
1148	EPILOGUE_3_ARGS_EX 8
1149	ENDPROC iemAImpl_imul_two_u64 %+ %3
1150	%endif ; RT_ARCH_AMD64
1151	%endmacro
1152	IEMIMPL_IMUL_TWO X86_EFL_OF \| X86_EFL_CF, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, , 0
1153	IEMIMPL_IMUL_TWO X86_EFL_OF \| X86_EFL_CF, 0, _intel, 1
1154	IEMIMPL_IMUL_TWO X86_EFL_OF \| X86_EFL_CF, 0, _amd, 2
1155
1156
1157	;
1158	; XCHG for memory operands. This implies locking. No flag changes.
1159	;
1160	; Each function takes two arguments, first the pointer to the memory,
1161	; then the pointer to the register. They all return void.
1162	;
1163	BEGINCODE
1164	BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
1165	PROLOGUE_2_ARGS
1166	mov T0_8, [A1]
1167	xchg [A0], T0_8
1168	mov [A1], T0_8
1169	EPILOGUE_2_ARGS
1170	ENDPROC iemAImpl_xchg_u8_locked
1171
1172	BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
1173	PROLOGUE_2_ARGS
1174	mov T0_16, [A1]
1175	xchg [A0], T0_16
1176	mov [A1], T0_16
1177	EPILOGUE_2_ARGS
1178	ENDPROC iemAImpl_xchg_u16_locked
1179
1180	BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
1181	PROLOGUE_2_ARGS
1182	mov T0_32, [A1]
1183	xchg [A0], T0_32
1184	mov [A1], T0_32
1185	EPILOGUE_2_ARGS
1186	ENDPROC iemAImpl_xchg_u32_locked
1187
1188	%ifdef RT_ARCH_AMD64
1189	BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
1190	PROLOGUE_2_ARGS
1191	mov T0, [A1]
1192	xchg [A0], T0
1193	mov [A1], T0
1194	EPILOGUE_2_ARGS
1195	ENDPROC iemAImpl_xchg_u64_locked
1196	%endif
1197
1198	; Unlocked variants for fDisregardLock mode.
1199
1200	BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
1201	PROLOGUE_2_ARGS
1202	mov T0_8, [A1]
1203	mov T1_8, [A0]
1204	mov [A0], T0_8
1205	mov [A1], T1_8
1206	EPILOGUE_2_ARGS
1207	ENDPROC iemAImpl_xchg_u8_unlocked
1208
1209	BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
1210	PROLOGUE_2_ARGS
1211	mov T0_16, [A1]
1212	mov T1_16, [A0]
1213	mov [A0], T0_16
1214	mov [A1], T1_16
1215	EPILOGUE_2_ARGS
1216	ENDPROC iemAImpl_xchg_u16_unlocked
1217
1218	BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
1219	PROLOGUE_2_ARGS
1220	mov T0_32, [A1]
1221	mov T1_32, [A0]
1222	mov [A0], T0_32
1223	mov [A1], T1_32
1224	EPILOGUE_2_ARGS
1225	ENDPROC iemAImpl_xchg_u32_unlocked
1226
1227	%ifdef RT_ARCH_AMD64
1228	BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
1229	PROLOGUE_2_ARGS
1230	mov T0, [A1]
1231	mov T1, [A0]
1232	mov [A0], T0
1233	mov [A1], T1
1234	EPILOGUE_2_ARGS
1235	ENDPROC iemAImpl_xchg_u64_unlocked
1236	%endif
1237
1238
1239	;
1240	; XADD for memory operands.
1241	;
1242	; Each function takes three arguments, first the pointer to the
1243	; memory/register, then the pointer to the register, and finally a pointer to
1244	; eflags. They all return void.
1245	;
1246	BEGINCODE
1247	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
1248	PROLOGUE_3_ARGS
1249	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1250	mov T0_8, [A1]
1251	xadd [A0], T0_8
1252	mov [A1], T0_8
1253	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1254	EPILOGUE_3_ARGS
1255	ENDPROC iemAImpl_xadd_u8
1256
1257	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
1258	PROLOGUE_3_ARGS
1259	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1260	mov T0_16, [A1]
1261	xadd [A0], T0_16
1262	mov [A1], T0_16
1263	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1264	EPILOGUE_3_ARGS
1265	ENDPROC iemAImpl_xadd_u16
1266
1267	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
1268	PROLOGUE_3_ARGS
1269	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1270	mov T0_32, [A1]
1271	xadd [A0], T0_32
1272	mov [A1], T0_32
1273	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1274	EPILOGUE_3_ARGS
1275	ENDPROC iemAImpl_xadd_u32
1276
1277	%ifdef RT_ARCH_AMD64
1278	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
1279	PROLOGUE_3_ARGS
1280	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1281	mov T0, [A1]
1282	xadd [A0], T0
1283	mov [A1], T0
1284	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1285	EPILOGUE_3_ARGS
1286	ENDPROC iemAImpl_xadd_u64
1287	%endif ; RT_ARCH_AMD64
1288
1289	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
1290	PROLOGUE_3_ARGS
1291	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1292	mov T0_8, [A1]
1293	lock xadd [A0], T0_8
1294	mov [A1], T0_8
1295	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1296	EPILOGUE_3_ARGS
1297	ENDPROC iemAImpl_xadd_u8_locked
1298
1299	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
1300	PROLOGUE_3_ARGS
1301	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1302	mov T0_16, [A1]
1303	lock xadd [A0], T0_16
1304	mov [A1], T0_16
1305	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1306	EPILOGUE_3_ARGS
1307	ENDPROC iemAImpl_xadd_u16_locked
1308
1309	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
1310	PROLOGUE_3_ARGS
1311	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1312	mov T0_32, [A1]
1313	lock xadd [A0], T0_32
1314	mov [A1], T0_32
1315	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1316	EPILOGUE_3_ARGS
1317	ENDPROC iemAImpl_xadd_u32_locked
1318
1319	%ifdef RT_ARCH_AMD64
1320	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
1321	PROLOGUE_3_ARGS
1322	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1323	mov T0, [A1]
1324	lock xadd [A0], T0
1325	mov [A1], T0
1326	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1327	EPILOGUE_3_ARGS
1328	ENDPROC iemAImpl_xadd_u64_locked
1329	%endif ; RT_ARCH_AMD64
1330
1331
1332	;
1333	; CMPXCHG8B.
1334	;
1335	; These are tricky register wise, so the code is duplicated for each calling
1336	; convention.
1337	;
1338	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1339	;
1340	; C-proto:
1341	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1342	; uint32_t *pEFlags));
1343	;
1344	; Note! Identical to iemAImpl_cmpxchg16b.
1345	;
1346	BEGINCODE
1347	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
1348	%ifdef RT_ARCH_AMD64
1349	%ifdef ASM_CALL64_MSC
1350	push rbx
1351
1352	mov r11, rdx ; pu64EaxEdx (is also T1)
1353	mov r10, rcx ; pu64Dst
1354
1355	mov ebx, [r8]
1356	mov ecx, [r8 + 4]
1357	IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1358	mov eax, [r11]
1359	mov edx, [r11 + 4]
1360
1361	lock cmpxchg8b [r10]
1362
1363	mov [r11], eax
1364	mov [r11 + 4], edx
1365	IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1366
1367	pop rbx
1368	ret
1369	%else
1370	push rbx
1371
1372	mov r10, rcx ; pEFlags
1373	mov r11, rdx ; pu64EbxEcx (is also T1)
1374
1375	mov ebx, [r11]
1376	mov ecx, [r11 + 4]
1377	IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1378	mov eax, [rsi]
1379	mov edx, [rsi + 4]
1380
1381	lock cmpxchg8b [rdi]
1382
1383	mov [rsi], eax
1384	mov [rsi + 4], edx
1385	IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1386
1387	pop rbx
1388	ret
1389
1390	%endif
1391	%else
1392	push esi
1393	push edi
1394	push ebx
1395	push ebp
1396
1397	mov edi, ecx ; pu64Dst
1398	mov esi, edx ; pu64EaxEdx
1399	mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
1400	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1401
1402	mov ebx, [ecx]
1403	mov ecx, [ecx + 4]
1404	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1405	mov eax, [esi]
1406	mov edx, [esi + 4]
1407
1408	lock cmpxchg8b [edi]
1409
1410	mov [esi], eax
1411	mov [esi + 4], edx
1412	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
1413
1414	pop ebp
1415	pop ebx
1416	pop edi
1417	pop esi
1418	ret 8
1419	%endif
1420	ENDPROC iemAImpl_cmpxchg8b
1421
1422	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
1423	; Lazy bird always lock prefixes cmpxchg8b.
1424	jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
1425	ENDPROC iemAImpl_cmpxchg8b_locked
1426
1427	%ifdef RT_ARCH_AMD64
1428
1429	;
1430	; CMPXCHG16B.
1431	;
1432	; These are tricky register wise, so the code is duplicated for each calling
1433	; convention.
1434	;
1435	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1436	;
1437	; C-proto:
1438	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
1439	; uint32_t *pEFlags));
1440	;
1441	; Note! Identical to iemAImpl_cmpxchg8b.
1442	;
1443	BEGINCODE
1444	BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
1445	%ifdef ASM_CALL64_MSC
1446	push rbx
1447
1448	mov r11, rdx ; pu64RaxRdx (is also T1)
1449	mov r10, rcx ; pu64Dst
1450
1451	mov rbx, [r8]
1452	mov rcx, [r8 + 8]
1453	IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1454	mov rax, [r11]
1455	mov rdx, [r11 + 8]
1456
1457	lock cmpxchg16b [r10]
1458
1459	mov [r11], rax
1460	mov [r11 + 8], rdx
1461	IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1462
1463	pop rbx
1464	ret
1465	%else
1466	push rbx
1467
1468	mov r10, rcx ; pEFlags
1469	mov r11, rdx ; pu64RbxRcx (is also T1)
1470
1471	mov rbx, [r11]
1472	mov rcx, [r11 + 8]
1473	IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1474	mov rax, [rsi]
1475	mov rdx, [rsi + 8]
1476
1477	lock cmpxchg16b [rdi]
1478
1479	mov [rsi], rax
1480	mov [rsi + 8], rdx
1481	IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1482
1483	pop rbx
1484	ret
1485
1486	%endif
1487	ENDPROC iemAImpl_cmpxchg16b
1488
1489	BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
1490	; Lazy bird always lock prefixes cmpxchg16b.
1491	jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
1492	ENDPROC iemAImpl_cmpxchg16b_locked
1493
1494	%endif ; RT_ARCH_AMD64
1495
1496
1497	;
1498	; CMPXCHG.
1499	;
1500	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1501	;
1502	; C-proto:
1503	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t puXDst, uintX_t puEax, uintX_t uReg, uint32_t pEFlags));
1504	;
1505	BEGINCODE
1506	%macro IEMIMPL_CMPXCHG 2
1507	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
1508	PROLOGUE_4_ARGS
1509	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
1510	mov al, [A1]
1511	%1 cmpxchg [A0], A2_8
1512	mov [A1], al
1513	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1514	EPILOGUE_4_ARGS
1515	ENDPROC iemAImpl_cmpxchg_u8 %+ %2
1516
1517	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
1518	PROLOGUE_4_ARGS
1519	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
1520	mov ax, [A1]
1521	%1 cmpxchg [A0], A2_16
1522	mov [A1], ax
1523	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1524	EPILOGUE_4_ARGS
1525	ENDPROC iemAImpl_cmpxchg_u16 %+ %2
1526
1527	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
1528	PROLOGUE_4_ARGS
1529	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
1530	mov eax, [A1]
1531	%1 cmpxchg [A0], A2_32
1532	mov [A1], eax
1533	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1534	EPILOGUE_4_ARGS
1535	ENDPROC iemAImpl_cmpxchg_u32 %+ %2
1536
1537	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
1538	%ifdef RT_ARCH_AMD64
1539	PROLOGUE_4_ARGS
1540	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
1541	mov rax, [A1]
1542	%1 cmpxchg [A0], A2
1543	mov [A1], rax
1544	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1545	EPILOGUE_4_ARGS
1546	%else
1547	;
1548	; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
1549	;
1550	push esi
1551	push edi
1552	push ebx
1553	push ebp
1554
1555	mov edi, ecx ; pu64Dst
1556	mov esi, edx ; pu64Rax
1557	mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
1558	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1559
1560	mov ebx, [ecx]
1561	mov ecx, [ecx + 4]
1562	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
1563	mov eax, [esi]
1564	mov edx, [esi + 4]
1565
1566	lock cmpxchg8b [edi]
1567
1568	; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
1569	jz .cmpxchg8b_not_equal
1570	cmp eax, eax ; just set the other flags.
1571	.store:
1572	mov [esi], eax
1573	mov [esi + 4], edx
1574	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
1575
1576	pop ebp
1577	pop ebx
1578	pop edi
1579	pop esi
1580	ret 8
1581
1582	.cmpxchg8b_not_equal:
1583	cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1584	jne .store
1585	cmp [esi], eax
1586	jmp .store
1587
1588	%endif
1589	ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1590	%endmacro ; IEMIMPL_CMPXCHG
1591
1592	IEMIMPL_CMPXCHG , ,
1593	IEMIMPL_CMPXCHG lock, _locked
1594
1595	;;
1596	; Macro for implementing a unary operator.
1597	;
1598	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1599	; variants, except on 32-bit system where the 64-bit accesses requires hand
1600	; coding.
1601	;
1602	; All the functions takes a pointer to the destination memory operand in A0,
1603	; the source register operand in A1 and a pointer to eflags in A2.
1604	;
1605	; @param 1 The instruction mnemonic.
1606	; @param 2 The modified flags.
1607	; @param 3 The undefined flags.
1608	;
1609	%macro IEMIMPL_UNARY_OP 3
1610	BEGINCODE
1611	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1612	PROLOGUE_2_ARGS
1613	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1614	%1 byte [A0]
1615	IEM_SAVE_FLAGS A1, %2, %3
1616	EPILOGUE_2_ARGS
1617	ENDPROC iemAImpl_ %+ %1 %+ _u8
1618
1619	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1620	PROLOGUE_2_ARGS
1621	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1622	lock %1 byte [A0]
1623	IEM_SAVE_FLAGS A1, %2, %3
1624	EPILOGUE_2_ARGS
1625	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1626
1627	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1628	PROLOGUE_2_ARGS
1629	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1630	%1 word [A0]
1631	IEM_SAVE_FLAGS A1, %2, %3
1632	EPILOGUE_2_ARGS
1633	ENDPROC iemAImpl_ %+ %1 %+ _u16
1634
1635	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1636	PROLOGUE_2_ARGS
1637	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1638	lock %1 word [A0]
1639	IEM_SAVE_FLAGS A1, %2, %3
1640	EPILOGUE_2_ARGS
1641	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1642
1643	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1644	PROLOGUE_2_ARGS
1645	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1646	%1 dword [A0]
1647	IEM_SAVE_FLAGS A1, %2, %3
1648	EPILOGUE_2_ARGS
1649	ENDPROC iemAImpl_ %+ %1 %+ _u32
1650
1651	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1652	PROLOGUE_2_ARGS
1653	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1654	lock %1 dword [A0]
1655	IEM_SAVE_FLAGS A1, %2, %3
1656	EPILOGUE_2_ARGS
1657	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1658
1659	%ifdef RT_ARCH_AMD64
1660	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1661	PROLOGUE_2_ARGS
1662	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1663	%1 qword [A0]
1664	IEM_SAVE_FLAGS A1, %2, %3
1665	EPILOGUE_2_ARGS
1666	ENDPROC iemAImpl_ %+ %1 %+ _u64
1667
1668	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1669	PROLOGUE_2_ARGS
1670	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1671	lock %1 qword [A0]
1672	IEM_SAVE_FLAGS A1, %2, %3
1673	EPILOGUE_2_ARGS
1674	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1675	%endif ; RT_ARCH_AMD64
1676
1677	%endmacro
1678
1679	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1680	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1681	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1682	IEMIMPL_UNARY_OP not, 0, 0
1683
1684
1685	;
1686	; BSWAP. No flag changes.
1687	;
1688	; Each function takes one argument, pointer to the value to bswap
1689	; (input/output). They all return void.
1690	;
1691	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1692	PROLOGUE_1_ARGS
1693	mov T0_32, [A0] ; just in case any of the upper bits are used.
1694	db 66h
1695	bswap T0_32
1696	mov [A0], T0_32
1697	EPILOGUE_1_ARGS
1698	ENDPROC iemAImpl_bswap_u16
1699
1700	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1701	PROLOGUE_1_ARGS
1702	mov T0_32, [A0]
1703	bswap T0_32
1704	mov [A0], T0_32
1705	EPILOGUE_1_ARGS
1706	ENDPROC iemAImpl_bswap_u32
1707
1708	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1709	%ifdef RT_ARCH_AMD64
1710	PROLOGUE_1_ARGS
1711	mov T0, [A0]
1712	bswap T0
1713	mov [A0], T0
1714	EPILOGUE_1_ARGS
1715	%else
1716	PROLOGUE_1_ARGS
1717	mov T0, [A0]
1718	mov T1, [A0 + 4]
1719	bswap T0
1720	bswap T1
1721	mov [A0 + 4], T0
1722	mov [A0], T1
1723	EPILOGUE_1_ARGS
1724	%endif
1725	ENDPROC iemAImpl_bswap_u64
1726
1727
1728	;;
1729	; Macro for implementing a shift operation.
1730	;
1731	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1732	; 32-bit system where the 64-bit accesses requires hand coding.
1733	;
1734	; All the functions takes a pointer to the destination memory operand in A0,
1735	; the shift count in A1 and a pointer to eflags in A2.
1736	;
1737	; @param 1 The instruction mnemonic.
1738	; @param 2 The modified flags.
1739	; @param 3 The undefined flags.
1740	;
1741	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1742	;
1743	; @note the _intel and _amd variants are implemented in C.
1744	;
1745	%macro IEMIMPL_SHIFT_OP 3
1746	BEGINCODE
1747	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1748	PROLOGUE_3_ARGS
1749	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1750	%ifdef ASM_CALL64_GCC
1751	mov cl, A1_8
1752	%1 byte [A0], cl
1753	%else
1754	xchg A1, A0
1755	%1 byte [A1], cl
1756	%endif
1757	IEM_SAVE_FLAGS A2, %2, %3
1758	EPILOGUE_3_ARGS
1759	ENDPROC iemAImpl_ %+ %1 %+ _u8
1760
1761	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1762	PROLOGUE_3_ARGS
1763	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1764	%ifdef ASM_CALL64_GCC
1765	mov cl, A1_8
1766	%1 word [A0], cl
1767	%else
1768	xchg A1, A0
1769	%1 word [A1], cl
1770	%endif
1771	IEM_SAVE_FLAGS A2, %2, %3
1772	EPILOGUE_3_ARGS
1773	ENDPROC iemAImpl_ %+ %1 %+ _u16
1774
1775	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1776	PROLOGUE_3_ARGS
1777	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1778	%ifdef ASM_CALL64_GCC
1779	mov cl, A1_8
1780	%1 dword [A0], cl
1781	%else
1782	xchg A1, A0
1783	%1 dword [A1], cl
1784	%endif
1785	IEM_SAVE_FLAGS A2, %2, %3
1786	EPILOGUE_3_ARGS
1787	ENDPROC iemAImpl_ %+ %1 %+ _u32
1788
1789	%ifdef RT_ARCH_AMD64
1790	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1791	PROLOGUE_3_ARGS
1792	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1793	%ifdef ASM_CALL64_GCC
1794	mov cl, A1_8
1795	%1 qword [A0], cl
1796	%else
1797	xchg A1, A0
1798	%1 qword [A1], cl
1799	%endif
1800	IEM_SAVE_FLAGS A2, %2, %3
1801	EPILOGUE_3_ARGS
1802	ENDPROC iemAImpl_ %+ %1 %+ _u64
1803	%endif ; RT_ARCH_AMD64
1804
1805	%endmacro
1806
1807	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
1808	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
1809	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
1810	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
1811	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1812	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1813	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1814
1815
1816	;;
1817	; Macro for implementing a double precision shift operation.
1818	;
1819	; This will generate code for the 16, 32 and 64 bit accesses, except on
1820	; 32-bit system where the 64-bit accesses requires hand coding.
1821	;
1822	; The functions takes the destination operand (r/m) in A0, the source (reg) in
1823	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1824	;
1825	; @param 1 The instruction mnemonic.
1826	; @param 2 The modified flags.
1827	; @param 3 The undefined flags.
1828	;
1829	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1830	;
1831	; @note the _intel and _amd variants are implemented in C.
1832	;
1833	%macro IEMIMPL_SHIFT_DBL_OP 3
1834	BEGINCODE
1835	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1836	PROLOGUE_4_ARGS
1837	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1838	%ifdef ASM_CALL64_GCC
1839	xchg A3, A2
1840	%1 [A0], A1_16, cl
1841	xchg A3, A2
1842	%else
1843	xchg A0, A2
1844	%1 [A2], A1_16, cl
1845	%endif
1846	IEM_SAVE_FLAGS A3, %2, %3
1847	EPILOGUE_4_ARGS
1848	ENDPROC iemAImpl_ %+ %1 %+ _u16
1849
1850	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1851	PROLOGUE_4_ARGS
1852	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1853	%ifdef ASM_CALL64_GCC
1854	xchg A3, A2
1855	%1 [A0], A1_32, cl
1856	xchg A3, A2
1857	%else
1858	xchg A0, A2
1859	%1 [A2], A1_32, cl
1860	%endif
1861	IEM_SAVE_FLAGS A3, %2, %3
1862	EPILOGUE_4_ARGS
1863	ENDPROC iemAImpl_ %+ %1 %+ _u32
1864
1865	%ifdef RT_ARCH_AMD64
1866	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1867	PROLOGUE_4_ARGS
1868	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1869	%ifdef ASM_CALL64_GCC
1870	xchg A3, A2
1871	%1 [A0], A1, cl
1872	xchg A3, A2
1873	%else
1874	xchg A0, A2
1875	%1 [A2], A1, cl
1876	%endif
1877	IEM_SAVE_FLAGS A3, %2, %3
1878	EPILOGUE_4_ARGS_EX 12
1879	ENDPROC iemAImpl_ %+ %1 %+ _u64
1880	%endif ; RT_ARCH_AMD64
1881
1882	%endmacro
1883
1884	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1885	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1886
1887
1888	;;
1889	; Macro for implementing a multiplication operations.
1890	;
1891	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1892	; 32-bit system where the 64-bit accesses requires hand coding.
1893	;
1894	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1895	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1896	; pointer to eflags in A3.
1897	;
1898	; The functions all return 0 so the caller can be used for div/idiv as well as
1899	; for the mul/imul implementation.
1900	;
1901	; @param 1 The instruction mnemonic.
1902	; @param 2 The modified flags.
1903	; @param 3 The undefined flags.
1904	; @param 4 Name suffix.
1905	; @param 5 EFLAGS behaviour: 0 for native, 1 for intel and 2 for AMD.
1906	;
1907	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1908	;
1909	%macro IEMIMPL_MUL_OP 5
1910	BEGINCODE
1911	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %4, 12
1912	PROLOGUE_3_ARGS
1913	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1914	mov al, [A0]
1915	%1 A1_8
1916	mov [A0], ax
1917	%if %5 != 1
1918	IEM_SAVE_FLAGS A2, %2, %3
1919	%else
1920	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %2, X86_EFL_AF \| X86_EFL_ZF, ax, 8, xAX
1921	%endif
1922	xor eax, eax
1923	EPILOGUE_3_ARGS
1924	ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %4
1925
1926	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %4, 16
1927	PROLOGUE_4_ARGS
1928	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1929	mov ax, [A0]
1930	%ifdef ASM_CALL64_GCC
1931	%1 A2_16
1932	mov [A0], ax
1933	mov [A1], dx
1934	%else
1935	mov T1, A1
1936	%1 A2_16
1937	mov [A0], ax
1938	mov [T1], dx
1939	%endif
1940	%if %5 != 1
1941	IEM_SAVE_FLAGS A3, %2, %3
1942	%else
1943	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF \| X86_EFL_ZF, ax, 16, xAX
1944	%endif
1945	xor eax, eax
1946	EPILOGUE_4_ARGS
1947	ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %4
1948
1949	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %4, 16
1950	PROLOGUE_4_ARGS
1951	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1952	mov eax, [A0]
1953	%ifdef ASM_CALL64_GCC
1954	%1 A2_32
1955	mov [A0], eax
1956	mov [A1], edx
1957	%else
1958	mov T1, A1
1959	%1 A2_32
1960	mov [A0], eax
1961	mov [T1], edx
1962	%endif
1963	%if %5 != 1
1964	IEM_SAVE_FLAGS A3, %2, %3
1965	%else
1966	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF \| X86_EFL_ZF, eax, 32, xAX
1967	%endif
1968	xor eax, eax
1969	EPILOGUE_4_ARGS
1970	ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %4
1971
1972	%ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1973	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %4, 20
1974	PROLOGUE_4_ARGS
1975	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1976	mov rax, [A0]
1977	%ifdef ASM_CALL64_GCC
1978	%1 A2
1979	mov [A0], rax
1980	mov [A1], rdx
1981	%else
1982	mov T1, A1
1983	%1 A2
1984	mov [A0], rax
1985	mov [T1], rdx
1986	%endif
1987	%if %5 != 1
1988	IEM_SAVE_FLAGS A3, %2, %3
1989	%else
1990	IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF \| X86_EFL_ZF, rax, 64, xAX
1991	%endif
1992	xor eax, eax
1993	EPILOGUE_4_ARGS_EX 12
1994	ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %4
1995	%endif ; !RT_ARCH_AMD64
1996
1997	%endmacro
1998
1999	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), , 0
2000	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), 0, _intel, 1
2001	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), 0, _amd, 2
2002	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), , 0
2003	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), 0, _intel, 1
2004	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), 0, _amd, 2
2005
2006
2007	BEGINCODE
2008	;;
2009	; Worker function for negating a 32-bit number in T1:T0
2010	; @uses None (T0,T1)
2011	BEGINPROC iemAImpl_negate_T0_T1_u32
2012	push 0
2013	push 0
2014	xchg T0_32, [xSP]
2015	xchg T1_32, [xSP + xCB]
2016	sub T0_32, [xSP]
2017	sbb T1_32, [xSP + xCB]
2018	add xSP, xCB*2
2019	ret
2020	ENDPROC iemAImpl_negate_T0_T1_u32
2021
2022	%ifdef RT_ARCH_AMD64
2023	;;
2024	; Worker function for negating a 64-bit number in T1:T0
2025	; @uses None (T0,T1)
2026	BEGINPROC iemAImpl_negate_T0_T1_u64
2027	push 0
2028	push 0
2029	xchg T0, [xSP]
2030	xchg T1, [xSP + xCB]
2031	sub T0, [xSP]
2032	sbb T1, [xSP + xCB]
2033	add xSP, xCB*2
2034	ret
2035	ENDPROC iemAImpl_negate_T0_T1_u64
2036	%endif
2037
2038
2039	;;
2040	; Macro for implementing a division operations.
2041	;
2042	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
2043	; 32-bit system where the 64-bit accesses requires hand coding.
2044	;
2045	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
2046	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
2047	; pointer to eflags in A3.
2048	;
2049	; The functions all return 0 on success and -1 if a divide error should be
2050	; raised by the caller.
2051	;
2052	; @param 1 The instruction mnemonic.
2053	; @param 2 The modified flags.
2054	; @param 3 The undefined flags.
2055	; @param 4 1 if signed, 0 if unsigned.
2056	; @param 5 Function suffix.
2057	; @param 6 EFLAGS variation: 0 for native, 1 for intel (ignored),
2058	; 2 for AMD (set AF, clear PF, ZF and SF).
2059	;
2060	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
2061	;
2062	%macro IEMIMPL_DIV_OP 6
2063	BEGINCODE
2064	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %5, 12
2065	PROLOGUE_3_ARGS
2066
2067	; div by chainsaw check.
2068	test A1_8, A1_8
2069	jz .div_zero
2070
2071	; Overflow check - unsigned division is simple to verify, haven't
2072	; found a simple way to check signed division yet unfortunately.
2073	%if %4 == 0
2074	cmp [A0 + 1], A1_8
2075	jae .div_overflow
2076	%else
2077	mov T0_16, [A0] ; T0 = dividend
2078	mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
2079	test A1_8, A1_8
2080	js .divisor_negative
2081	test T0_16, T0_16
2082	jns .both_positive
2083	neg T0_16
2084	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
2085	push T0 ; Start off like unsigned below.
2086	shr T0_16, 7
2087	cmp T0_8, A1_8
2088	pop T0
2089	jb .div_no_overflow
2090	ja .div_overflow
2091	and T0_8, 0x7f ; Special case for covering (divisor - 1).
2092	cmp T0_8, A1_8
2093	jae .div_overflow
2094	jmp .div_no_overflow
2095
2096	.divisor_negative:
2097	neg A1_8
2098	test T0_16, T0_16
2099	jns .one_of_each
2100	neg T0_16
2101	.both_positive: ; Same as unsigned shifted by sign indicator bit.
2102	shr T0_16, 7
2103	cmp T0_8, A1_8
2104	jae .div_overflow
2105	.div_no_overflow:
2106	mov A1, T1 ; restore divisor
2107	%endif
2108
2109	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
2110	mov ax, [A0]
2111	%1 A1_8
2112	mov [A0], ax
2113	%if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2114	IEM_ADJUST_FLAGS A2, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF, X86_EFL_AF
2115	%else
2116	IEM_SAVE_FLAGS A2, %2, %3
2117	%endif
2118	xor eax, eax
2119
2120	.return:
2121	EPILOGUE_3_ARGS
2122
2123	.div_zero:
2124	.div_overflow:
2125	mov eax, -1
2126	jmp .return
2127	ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %5
2128
2129	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %5, 16
2130	PROLOGUE_4_ARGS
2131
2132	; div by chainsaw check.
2133	test A2_16, A2_16
2134	jz .div_zero
2135
2136	; Overflow check - unsigned division is simple to verify, haven't
2137	; found a simple way to check signed division yet unfortunately.
2138	%if %4 == 0
2139	cmp [A1], A2_16
2140	jae .div_overflow
2141	%else
2142	mov T0_16, [A1]
2143	shl T0_32, 16
2144	mov T0_16, [A0] ; T0 = dividend
2145	mov T1, A2 ; T1 = divisor
2146	test T1_16, T1_16
2147	js .divisor_negative
2148	test T0_32, T0_32
2149	jns .both_positive
2150	neg T0_32
2151	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
2152	push T0 ; Start off like unsigned below.
2153	shr T0_32, 15
2154	cmp T0_16, T1_16
2155	pop T0
2156	jb .div_no_overflow
2157	ja .div_overflow
2158	and T0_16, 0x7fff ; Special case for covering (divisor - 1).
2159	cmp T0_16, T1_16
2160	jae .div_overflow
2161	jmp .div_no_overflow
2162
2163	.divisor_negative:
2164	neg T1_16
2165	test T0_32, T0_32
2166	jns .one_of_each
2167	neg T0_32
2168	.both_positive: ; Same as unsigned shifted by sign indicator bit.
2169	shr T0_32, 15
2170	cmp T0_16, T1_16
2171	jae .div_overflow
2172	.div_no_overflow:
2173	%endif
2174
2175	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
2176	%ifdef ASM_CALL64_GCC
2177	mov T1, A2
2178	mov ax, [A0]
2179	mov dx, [A1]
2180	%1 T1_16
2181	mov [A0], ax
2182	mov [A1], dx
2183	%else
2184	mov T1, A1
2185	mov ax, [A0]
2186	mov dx, [T1]
2187	%1 A2_16
2188	mov [A0], ax
2189	mov [T1], dx
2190	%endif
2191	%if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2192	IEM_ADJUST_FLAGS A3, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF, X86_EFL_AF
2193	%else
2194	IEM_SAVE_FLAGS A3, %2, %3
2195	%endif
2196	xor eax, eax
2197
2198	.return:
2199	EPILOGUE_4_ARGS
2200
2201	.div_zero:
2202	.div_overflow:
2203	mov eax, -1
2204	jmp .return
2205	ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %5
2206
2207	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %5, 16
2208	PROLOGUE_4_ARGS
2209
2210	; div by chainsaw check.
2211	test A2_32, A2_32
2212	jz .div_zero
2213
2214	; Overflow check - unsigned division is simple to verify, haven't
2215	; found a simple way to check signed division yet unfortunately.
2216	%if %4 == 0
2217	cmp [A1], A2_32
2218	jae .div_overflow
2219	%else
2220	push A2 ; save A2 so we modify it (we out of regs on x86).
2221	mov T0_32, [A0] ; T0 = dividend low
2222	mov T1_32, [A1] ; T1 = dividend high
2223	test A2_32, A2_32
2224	js .divisor_negative
2225	test T1_32, T1_32
2226	jns .both_positive
2227	call NAME(iemAImpl_negate_T0_T1_u32)
2228	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
2229	push T0 ; Start off like unsigned below.
2230	shl T1_32, 1
2231	shr T0_32, 31
2232	or T1_32, T0_32
2233	cmp T1_32, A2_32
2234	pop T0
2235	jb .div_no_overflow
2236	ja .div_overflow
2237	and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
2238	cmp T0_32, A2_32
2239	jae .div_overflow
2240	jmp .div_no_overflow
2241
2242	.divisor_negative:
2243	neg A2_32
2244	test T1_32, T1_32
2245	jns .one_of_each
2246	call NAME(iemAImpl_negate_T0_T1_u32)
2247	.both_positive: ; Same as unsigned shifted by sign indicator bit.
2248	shl T1_32, 1
2249	shr T0_32, 31
2250	or T1_32, T0_32
2251	cmp T1_32, A2_32
2252	jae .div_overflow
2253	.div_no_overflow:
2254	pop A2
2255	%endif
2256
2257	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
2258	mov eax, [A0]
2259	%ifdef ASM_CALL64_GCC
2260	mov T1, A2
2261	mov eax, [A0]
2262	mov edx, [A1]
2263	%1 T1_32
2264	mov [A0], eax
2265	mov [A1], edx
2266	%else
2267	mov T1, A1
2268	mov eax, [A0]
2269	mov edx, [T1]
2270	%1 A2_32
2271	mov [A0], eax
2272	mov [T1], edx
2273	%endif
2274	%if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2275	IEM_ADJUST_FLAGS A3, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF, X86_EFL_AF
2276	%else
2277	IEM_SAVE_FLAGS A3, %2, %3
2278	%endif
2279	xor eax, eax
2280
2281	.return:
2282	EPILOGUE_4_ARGS
2283
2284	.div_overflow:
2285	%if %4 != 0
2286	pop A2
2287	%endif
2288	.div_zero:
2289	mov eax, -1
2290	jmp .return
2291	ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %5
2292
2293	%ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
2294	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %5, 20
2295	PROLOGUE_4_ARGS
2296
2297	test A2, A2
2298	jz .div_zero
2299	%if %4 == 0
2300	cmp [A1], A2
2301	jae .div_overflow
2302	%else
2303	push A2 ; save A2 so we modify it (we out of regs on x86).
2304	mov T0, [A0] ; T0 = dividend low
2305	mov T1, [A1] ; T1 = dividend high
2306	test A2, A2
2307	js .divisor_negative
2308	test T1, T1
2309	jns .both_positive
2310	call NAME(iemAImpl_negate_T0_T1_u64)
2311	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
2312	push T0 ; Start off like unsigned below.
2313	shl T1, 1
2314	shr T0, 63
2315	or T1, T0
2316	cmp T1, A2
2317	pop T0
2318	jb .div_no_overflow
2319	ja .div_overflow
2320	mov T1, 0x7fffffffffffffff
2321	and T0, T1 ; Special case for covering (divisor - 1).
2322	cmp T0, A2
2323	jae .div_overflow
2324	jmp .div_no_overflow
2325
2326	.divisor_negative:
2327	neg A2
2328	test T1, T1
2329	jns .one_of_each
2330	call NAME(iemAImpl_negate_T0_T1_u64)
2331	.both_positive: ; Same as unsigned shifted by sign indicator bit.
2332	shl T1, 1
2333	shr T0, 63
2334	or T1, T0
2335	cmp T1, A2
2336	jae .div_overflow
2337	.div_no_overflow:
2338	pop A2
2339	%endif
2340
2341	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
2342	mov rax, [A0]
2343	%ifdef ASM_CALL64_GCC
2344	mov T1, A2
2345	mov rax, [A0]
2346	mov rdx, [A1]
2347	%1 T1
2348	mov [A0], rax
2349	mov [A1], rdx
2350	%else
2351	mov T1, A1
2352	mov rax, [A0]
2353	mov rdx, [T1]
2354	%1 A2
2355	mov [A0], rax
2356	mov [T1], rdx
2357	%endif
2358	%if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2359	IEM_ADJUST_FLAGS A3, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF, X86_EFL_AF
2360	%else
2361	IEM_SAVE_FLAGS A3, %2, %3
2362	%endif
2363	xor eax, eax
2364
2365	.return:
2366	EPILOGUE_4_ARGS_EX 12
2367
2368	.div_overflow:
2369	%if %4 != 0
2370	pop A2
2371	%endif
2372	.div_zero:
2373	mov eax, -1
2374	jmp .return
2375	ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %5
2376	%endif ; !RT_ARCH_AMD64
2377
2378	%endmacro
2379
2380	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0, , 0
2381	IEMIMPL_DIV_OP div, 0, 0, 0, _intel, 1
2382	IEMIMPL_DIV_OP div, 0, 0, 0, _amd, 2
2383	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 1, , 0
2384	IEMIMPL_DIV_OP idiv, 0, 0, 1, _intel, 1
2385	IEMIMPL_DIV_OP idiv, 0, 0, 1, _amd, 2
2386
2387
2388	;;
2389	; Macro for implementing memory fence operation.
2390	;
2391	; No return value, no operands or anything.
2392	;
2393	; @param 1 The instruction.
2394	;
2395	%macro IEMIMPL_MEM_FENCE 1
2396	BEGINCODE
2397	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
2398	%1
2399	ret
2400	ENDPROC iemAImpl_ %+ %1
2401	%endmacro
2402
2403	IEMIMPL_MEM_FENCE lfence
2404	IEMIMPL_MEM_FENCE sfence
2405	IEMIMPL_MEM_FENCE mfence
2406
2407	;;
2408	; Alternative for non-SSE2 host.
2409	;
2410	BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
2411	push xAX
2412	xchg xAX, [xSP]
2413	add xSP, xCB
2414	ret
2415	ENDPROC iemAImpl_alt_mem_fence
2416
2417
2418	;;
2419	; Initialize the FPU for the actual instruction being emulated, this means
2420	; loading parts of the guest's control word and status word.
2421	;
2422	; @uses 24 bytes of stack. T0, T1
2423	; @param 1 Expression giving the address of the FXSTATE of the guest.
2424	;
2425	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
2426	fnstenv [xSP]
2427
2428	; FCW - for exception, precision and rounding control.
2429	movzx T0, word [%1 + X86FXSTATE.FCW]
2430	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
2431	mov [xSP + X86FSTENV32P.FCW], T0_16
2432
2433	; FSW - for undefined C0, C1, C2, and C3.
2434	movzx T1, word [%1 + X86FXSTATE.FSW]
2435	and T1, X86_FSW_C_MASK
2436	movzx T0, word [xSP + X86FSTENV32P.FSW]
2437	and T0, X86_FSW_TOP_MASK
2438	or T0, T1
2439	mov [xSP + X86FSTENV32P.FSW], T0_16
2440
2441	fldenv [xSP]
2442	%endmacro
2443
2444
2445	;;
2446	; Initialize the FPU for the actual instruction being emulated, this means
2447	; loading parts of the guest's control word, status word, and update the
2448	; tag word for the top register if it's empty.
2449	;
2450	; ASSUMES actual TOP=7
2451	;
2452	; @uses 24 bytes of stack. T0, T1
2453	; @param 1 Expression giving the address of the FXSTATE of the guest.
2454	;
2455	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW_AND_FTW_0 1
2456	fnstenv [xSP]
2457
2458	; FCW - for exception, precision and rounding control.
2459	movzx T0_32, word [%1 + X86FXSTATE.FCW]
2460	and T0_32, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
2461	mov [xSP + X86FSTENV32P.FCW], T0_16
2462
2463	; FSW - for undefined C0, C1, C2, and C3.
2464	movzx T1_32, word [%1 + X86FXSTATE.FSW]
2465	and T1_32, X86_FSW_C_MASK
2466	movzx T0_32, word [xSP + X86FSTENV32P.FSW]
2467	and T0_32, X86_FSW_TOP_MASK
2468	or T0_32, T1_32
2469	mov [xSP + X86FSTENV32P.FSW], T0_16
2470
2471	; FTW - Only for ST0 (in/out).
2472	movzx T1_32, word [%1 + X86FXSTATE.FSW]
2473	shr T1_32, X86_FSW_TOP_SHIFT
2474	and T1_32, X86_FSW_TOP_SMASK
2475	bt [%1 + X86FXSTATE.FTW], T1_16 ; Empty if FTW bit is clear. Fixed register order.
2476	jc %%st0_not_empty
2477	or word [xSP + X86FSTENV32P.FTW], 0c000h ; TOP=7, so set TAG(7)=3
2478	%%st0_not_empty:
2479
2480	fldenv [xSP]
2481	%endmacro
2482
2483
2484	;;
2485	; Need to move this as well somewhere better?
2486	;
2487	struc IEMFPURESULT
2488	.r80Result resw 5
2489	.FSW resw 1
2490	endstruc
2491
2492
2493	;;
2494	; Need to move this as well somewhere better?
2495	;
2496	struc IEMFPURESULTTWO
2497	.r80Result1 resw 5
2498	.FSW resw 1
2499	.r80Result2 resw 5
2500	endstruc
2501
2502
2503	;
2504	;---------------------- 16-bit signed integer operations ----------------------
2505	;
2506
2507
2508	;;
2509	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
2510	;
2511	; @param A0 FPU context (fxsave).
2512	; @param A1 Pointer to a IEMFPURESULT for the output.
2513	; @param A2 Pointer to the 16-bit floating point value to convert.
2514	;
2515	BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i16, 12
2516	PROLOGUE_3_ARGS
2517	sub xSP, 20h
2518
2519	fninit
2520	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2521	fild word [A2]
2522
2523	fnstsw word [A1 + IEMFPURESULT.FSW]
2524	fnclex
2525	fstp tword [A1 + IEMFPURESULT.r80Result]
2526
2527	fninit
2528	add xSP, 20h
2529	EPILOGUE_3_ARGS
2530	ENDPROC iemAImpl_fild_r80_from_i16
2531
2532
2533	;;
2534	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
2535	;
2536	; @param A0 FPU context (fxsave).
2537	; @param A1 Where to return the output FSW.
2538	; @param A2 Where to store the 16-bit signed integer value.
2539	; @param A3 Pointer to the 80-bit value.
2540	;
2541	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
2542	PROLOGUE_4_ARGS
2543	sub xSP, 20h
2544
2545	fninit
2546	fld tword [A3]
2547	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2548	fistp word [A2]
2549
2550	fnstsw word [A1]
2551
2552	fninit
2553	add xSP, 20h
2554	EPILOGUE_4_ARGS
2555	ENDPROC iemAImpl_fist_r80_to_i16
2556
2557
2558	;;
2559	; Store a 80-bit floating point value (register) as a 16-bit signed integer
2560	; (memory) with truncation.
2561	;
2562	; @param A0 FPU context (fxsave).
2563	; @param A1 Where to return the output FSW.
2564	; @param A2 Where to store the 16-bit signed integer value.
2565	; @param A3 Pointer to the 80-bit value.
2566	;
2567	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
2568	PROLOGUE_4_ARGS
2569	sub xSP, 20h
2570
2571	fninit
2572	fld tword [A3]
2573	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2574	fisttp word [A2]
2575
2576	fnstsw word [A1]
2577
2578	fninit
2579	add xSP, 20h
2580	EPILOGUE_4_ARGS
2581	ENDPROC iemAImpl_fistt_r80_to_i16
2582
2583
2584	;;
2585	; FPU instruction working on one 80-bit and one 16-bit signed integer value.
2586	;
2587	; @param 1 The instruction
2588	;
2589	; @param A0 FPU context (fxsave).
2590	; @param A1 Pointer to a IEMFPURESULT for the output.
2591	; @param A2 Pointer to the 80-bit value.
2592	; @param A3 Pointer to the 16-bit value.
2593	;
2594	%macro IEMIMPL_FPU_R80_BY_I16 1
2595	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2596	PROLOGUE_4_ARGS
2597	sub xSP, 20h
2598
2599	fninit
2600	fld tword [A2]
2601	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2602	%1 word [A3]
2603
2604	fnstsw word [A1 + IEMFPURESULT.FSW]
2605	fnclex
2606	fstp tword [A1 + IEMFPURESULT.r80Result]
2607
2608	fninit
2609	add xSP, 20h
2610	EPILOGUE_4_ARGS
2611	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2612	%endmacro
2613
2614	IEMIMPL_FPU_R80_BY_I16 fiadd
2615	IEMIMPL_FPU_R80_BY_I16 fimul
2616	IEMIMPL_FPU_R80_BY_I16 fisub
2617	IEMIMPL_FPU_R80_BY_I16 fisubr
2618	IEMIMPL_FPU_R80_BY_I16 fidiv
2619	IEMIMPL_FPU_R80_BY_I16 fidivr
2620
2621
2622	;;
2623	; FPU instruction working on one 80-bit and one 16-bit signed integer value,
2624	; only returning FSW.
2625	;
2626	; @param 1 The instruction
2627	;
2628	; @param A0 FPU context (fxsave).
2629	; @param A1 Where to store the output FSW.
2630	; @param A2 Pointer to the 80-bit value.
2631	; @param A3 Pointer to the 64-bit value.
2632	;
2633	%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
2634	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2635	PROLOGUE_4_ARGS
2636	sub xSP, 20h
2637
2638	fninit
2639	fld tword [A2]
2640	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2641	%1 word [A3]
2642
2643	fnstsw word [A1]
2644
2645	fninit
2646	add xSP, 20h
2647	EPILOGUE_4_ARGS
2648	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2649	%endmacro
2650
2651	IEMIMPL_FPU_R80_BY_I16_FSW ficom
2652
2653
2654
2655	;
2656	;---------------------- 32-bit signed integer operations ----------------------
2657	;
2658
2659
2660	;;
2661	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2662	;
2663	; @param A0 FPU context (fxsave).
2664	; @param A1 Pointer to a IEMFPURESULT for the output.
2665	; @param A2 Pointer to the 32-bit floating point value to convert.
2666	;
2667	BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i32, 12
2668	PROLOGUE_3_ARGS
2669	sub xSP, 20h
2670
2671	fninit
2672	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2673	fild dword [A2]
2674
2675	fnstsw word [A1 + IEMFPURESULT.FSW]
2676	fnclex
2677	fstp tword [A1 + IEMFPURESULT.r80Result]
2678
2679	fninit
2680	add xSP, 20h
2681	EPILOGUE_3_ARGS
2682	ENDPROC iemAImpl_fild_r80_from_i32
2683
2684
2685	;;
2686	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2687	;
2688	; @param A0 FPU context (fxsave).
2689	; @param A1 Where to return the output FSW.
2690	; @param A2 Where to store the 32-bit signed integer value.
2691	; @param A3 Pointer to the 80-bit value.
2692	;
2693	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2694	PROLOGUE_4_ARGS
2695	sub xSP, 20h
2696
2697	fninit
2698	fld tword [A3]
2699	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2700	fistp dword [A2]
2701
2702	fnstsw word [A1]
2703
2704	fninit
2705	add xSP, 20h
2706	EPILOGUE_4_ARGS
2707	ENDPROC iemAImpl_fist_r80_to_i32
2708
2709
2710	;;
2711	; Store a 80-bit floating point value (register) as a 32-bit signed integer
2712	; (memory) with truncation.
2713	;
2714	; @param A0 FPU context (fxsave).
2715	; @param A1 Where to return the output FSW.
2716	; @param A2 Where to store the 32-bit signed integer value.
2717	; @param A3 Pointer to the 80-bit value.
2718	;
2719	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2720	PROLOGUE_4_ARGS
2721	sub xSP, 20h
2722
2723	fninit
2724	fld tword [A3]
2725	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2726	fisttp dword [A2]
2727
2728	fnstsw word [A1]
2729
2730	fninit
2731	add xSP, 20h
2732	EPILOGUE_4_ARGS
2733	ENDPROC iemAImpl_fistt_r80_to_i32
2734
2735
2736	;;
2737	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2738	;
2739	; @param 1 The instruction
2740	;
2741	; @param A0 FPU context (fxsave).
2742	; @param A1 Pointer to a IEMFPURESULT for the output.
2743	; @param A2 Pointer to the 80-bit value.
2744	; @param A3 Pointer to the 32-bit value.
2745	;
2746	%macro IEMIMPL_FPU_R80_BY_I32 1
2747	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2748	PROLOGUE_4_ARGS
2749	sub xSP, 20h
2750
2751	fninit
2752	fld tword [A2]
2753	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2754	%1 dword [A3]
2755
2756	fnstsw word [A1 + IEMFPURESULT.FSW]
2757	fnclex
2758	fstp tword [A1 + IEMFPURESULT.r80Result]
2759
2760	fninit
2761	add xSP, 20h
2762	EPILOGUE_4_ARGS
2763	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2764	%endmacro
2765
2766	IEMIMPL_FPU_R80_BY_I32 fiadd
2767	IEMIMPL_FPU_R80_BY_I32 fimul
2768	IEMIMPL_FPU_R80_BY_I32 fisub
2769	IEMIMPL_FPU_R80_BY_I32 fisubr
2770	IEMIMPL_FPU_R80_BY_I32 fidiv
2771	IEMIMPL_FPU_R80_BY_I32 fidivr
2772
2773
2774	;;
2775	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2776	; only returning FSW.
2777	;
2778	; @param 1 The instruction
2779	;
2780	; @param A0 FPU context (fxsave).
2781	; @param A1 Where to store the output FSW.
2782	; @param A2 Pointer to the 80-bit value.
2783	; @param A3 Pointer to the 64-bit value.
2784	;
2785	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2786	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2787	PROLOGUE_4_ARGS
2788	sub xSP, 20h
2789
2790	fninit
2791	fld tword [A2]
2792	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2793	%1 dword [A3]
2794
2795	fnstsw word [A1]
2796
2797	fninit
2798	add xSP, 20h
2799	EPILOGUE_4_ARGS
2800	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2801	%endmacro
2802
2803	IEMIMPL_FPU_R80_BY_I32_FSW ficom
2804
2805
2806
2807	;
2808	;---------------------- 64-bit signed integer operations ----------------------
2809	;
2810
2811
2812	;;
2813	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2814	;
2815	; @param A0 FPU context (fxsave).
2816	; @param A1 Pointer to a IEMFPURESULT for the output.
2817	; @param A2 Pointer to the 64-bit floating point value to convert.
2818	;
2819	BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i64, 12
2820	PROLOGUE_3_ARGS
2821	sub xSP, 20h
2822
2823	fninit
2824	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2825	fild qword [A2]
2826
2827	fnstsw word [A1 + IEMFPURESULT.FSW]
2828	fnclex
2829	fstp tword [A1 + IEMFPURESULT.r80Result]
2830
2831	fninit
2832	add xSP, 20h
2833	EPILOGUE_3_ARGS
2834	ENDPROC iemAImpl_fild_r80_from_i64
2835
2836
2837	;;
2838	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2839	;
2840	; @param A0 FPU context (fxsave).
2841	; @param A1 Where to return the output FSW.
2842	; @param A2 Where to store the 64-bit signed integer value.
2843	; @param A3 Pointer to the 80-bit value.
2844	;
2845	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2846	PROLOGUE_4_ARGS
2847	sub xSP, 20h
2848
2849	fninit
2850	fld tword [A3]
2851	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2852	fistp qword [A2]
2853
2854	fnstsw word [A1]
2855
2856	fninit
2857	add xSP, 20h
2858	EPILOGUE_4_ARGS
2859	ENDPROC iemAImpl_fist_r80_to_i64
2860
2861
2862	;;
2863	; Store a 80-bit floating point value (register) as a 64-bit signed integer
2864	; (memory) with truncation.
2865	;
2866	; @param A0 FPU context (fxsave).
2867	; @param A1 Where to return the output FSW.
2868	; @param A2 Where to store the 64-bit signed integer value.
2869	; @param A3 Pointer to the 80-bit value.
2870	;
2871	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2872	PROLOGUE_4_ARGS
2873	sub xSP, 20h
2874
2875	fninit
2876	fld tword [A3]
2877	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2878	fisttp qword [A2]
2879
2880	fnstsw word [A1]
2881
2882	fninit
2883	add xSP, 20h
2884	EPILOGUE_4_ARGS
2885	ENDPROC iemAImpl_fistt_r80_to_i64
2886
2887
2888
2889	;
2890	;---------------------- 32-bit floating point operations ----------------------
2891	;
2892
2893	;;
2894	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2895	;
2896	; @param A0 FPU context (fxsave).
2897	; @param A1 Pointer to a IEMFPURESULT for the output.
2898	; @param A2 Pointer to the 32-bit floating point value to convert.
2899	;
2900	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r32, 12
2901	PROLOGUE_3_ARGS
2902	sub xSP, 20h
2903
2904	fninit
2905	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2906	fld dword [A2]
2907
2908	fnstsw word [A1 + IEMFPURESULT.FSW]
2909	fnclex
2910	fstp tword [A1 + IEMFPURESULT.r80Result]
2911
2912	fninit
2913	add xSP, 20h
2914	EPILOGUE_3_ARGS
2915	ENDPROC iemAImpl_fld_r80_from_r32
2916
2917
2918	;;
2919	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2920	;
2921	; @param A0 FPU context (fxsave).
2922	; @param A1 Where to return the output FSW.
2923	; @param A2 Where to store the 32-bit value.
2924	; @param A3 Pointer to the 80-bit value.
2925	;
2926	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2927	PROLOGUE_4_ARGS
2928	sub xSP, 20h
2929
2930	fninit
2931	fld tword [A3]
2932	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2933	fst dword [A2]
2934
2935	fnstsw word [A1]
2936
2937	fninit
2938	add xSP, 20h
2939	EPILOGUE_4_ARGS
2940	ENDPROC iemAImpl_fst_r80_to_r32
2941
2942
2943	;;
2944	; FPU instruction working on one 80-bit and one 32-bit floating point value.
2945	;
2946	; @param 1 The instruction
2947	;
2948	; @param A0 FPU context (fxsave).
2949	; @param A1 Pointer to a IEMFPURESULT for the output.
2950	; @param A2 Pointer to the 80-bit value.
2951	; @param A3 Pointer to the 32-bit value.
2952	;
2953	%macro IEMIMPL_FPU_R80_BY_R32 1
2954	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2955	PROLOGUE_4_ARGS
2956	sub xSP, 20h
2957
2958	fninit
2959	fld tword [A2]
2960	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2961	%1 dword [A3]
2962
2963	fnstsw word [A1 + IEMFPURESULT.FSW]
2964	fnclex
2965	fstp tword [A1 + IEMFPURESULT.r80Result]
2966
2967	fninit
2968	add xSP, 20h
2969	EPILOGUE_4_ARGS
2970	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2971	%endmacro
2972
2973	IEMIMPL_FPU_R80_BY_R32 fadd
2974	IEMIMPL_FPU_R80_BY_R32 fmul
2975	IEMIMPL_FPU_R80_BY_R32 fsub
2976	IEMIMPL_FPU_R80_BY_R32 fsubr
2977	IEMIMPL_FPU_R80_BY_R32 fdiv
2978	IEMIMPL_FPU_R80_BY_R32 fdivr
2979
2980
2981	;;
2982	; FPU instruction working on one 80-bit and one 32-bit floating point value,
2983	; only returning FSW.
2984	;
2985	; @param 1 The instruction
2986	;
2987	; @param A0 FPU context (fxsave).
2988	; @param A1 Where to store the output FSW.
2989	; @param A2 Pointer to the 80-bit value.
2990	; @param A3 Pointer to the 64-bit value.
2991	;
2992	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2993	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2994	PROLOGUE_4_ARGS
2995	sub xSP, 20h
2996
2997	fninit
2998	fld tword [A2]
2999	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3000	%1 dword [A3]
3001
3002	fnstsw word [A1]
3003
3004	fninit
3005	add xSP, 20h
3006	EPILOGUE_4_ARGS
3007	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
3008	%endmacro
3009
3010	IEMIMPL_FPU_R80_BY_R32_FSW fcom
3011
3012
3013
3014	;
3015	;---------------------- 64-bit floating point operations ----------------------
3016	;
3017
3018	;;
3019	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
3020	;
3021	; @param A0 FPU context (fxsave).
3022	; @param A1 Pointer to a IEMFPURESULT for the output.
3023	; @param A2 Pointer to the 64-bit floating point value to convert.
3024	;
3025	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r64, 12
3026	PROLOGUE_3_ARGS
3027	sub xSP, 20h
3028
3029	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3030	fld qword [A2]
3031
3032	fnstsw word [A1 + IEMFPURESULT.FSW]
3033	fnclex
3034	fstp tword [A1 + IEMFPURESULT.r80Result]
3035
3036	fninit
3037	add xSP, 20h
3038	EPILOGUE_3_ARGS
3039	ENDPROC iemAImpl_fld_r80_from_r64
3040
3041
3042	;;
3043	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
3044	;
3045	; @param A0 FPU context (fxsave).
3046	; @param A1 Where to return the output FSW.
3047	; @param A2 Where to store the 64-bit value.
3048	; @param A3 Pointer to the 80-bit value.
3049	;
3050	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
3051	PROLOGUE_4_ARGS
3052	sub xSP, 20h
3053
3054	fninit
3055	fld tword [A3]
3056	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3057	fst qword [A2]
3058
3059	fnstsw word [A1]
3060
3061	fninit
3062	add xSP, 20h
3063	EPILOGUE_4_ARGS
3064	ENDPROC iemAImpl_fst_r80_to_r64
3065
3066
3067	;;
3068	; FPU instruction working on one 80-bit and one 64-bit floating point value.
3069	;
3070	; @param 1 The instruction
3071	;
3072	; @param A0 FPU context (fxsave).
3073	; @param A1 Pointer to a IEMFPURESULT for the output.
3074	; @param A2 Pointer to the 80-bit value.
3075	; @param A3 Pointer to the 64-bit value.
3076	;
3077	%macro IEMIMPL_FPU_R80_BY_R64 1
3078	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
3079	PROLOGUE_4_ARGS
3080	sub xSP, 20h
3081
3082	fninit
3083	fld tword [A2]
3084	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3085	%1 qword [A3]
3086
3087	fnstsw word [A1 + IEMFPURESULT.FSW]
3088	fnclex
3089	fstp tword [A1 + IEMFPURESULT.r80Result]
3090
3091	fninit
3092	add xSP, 20h
3093	EPILOGUE_4_ARGS
3094	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
3095	%endmacro
3096
3097	IEMIMPL_FPU_R80_BY_R64 fadd
3098	IEMIMPL_FPU_R80_BY_R64 fmul
3099	IEMIMPL_FPU_R80_BY_R64 fsub
3100	IEMIMPL_FPU_R80_BY_R64 fsubr
3101	IEMIMPL_FPU_R80_BY_R64 fdiv
3102	IEMIMPL_FPU_R80_BY_R64 fdivr
3103
3104	;;
3105	; FPU instruction working on one 80-bit and one 64-bit floating point value,
3106	; only returning FSW.
3107	;
3108	; @param 1 The instruction
3109	;
3110	; @param A0 FPU context (fxsave).
3111	; @param A1 Where to store the output FSW.
3112	; @param A2 Pointer to the 80-bit value.
3113	; @param A3 Pointer to the 64-bit value.
3114	;
3115	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
3116	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
3117	PROLOGUE_4_ARGS
3118	sub xSP, 20h
3119
3120	fninit
3121	fld tword [A2]
3122	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3123	%1 qword [A3]
3124
3125	fnstsw word [A1]
3126
3127	fninit
3128	add xSP, 20h
3129	EPILOGUE_4_ARGS
3130	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
3131	%endmacro
3132
3133	IEMIMPL_FPU_R80_BY_R64_FSW fcom
3134
3135
3136
3137	;
3138	;---------------------- 80-bit floating point operations ----------------------
3139	;
3140
3141	;;
3142	; Loads a 80-bit floating point register value from memory.
3143	;
3144	; @param A0 FPU context (fxsave).
3145	; @param A1 Pointer to a IEMFPURESULT for the output.
3146	; @param A2 Pointer to the 80-bit floating point value to load.
3147	;
3148	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
3149	PROLOGUE_3_ARGS
3150	sub xSP, 20h
3151
3152	fninit
3153	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3154	fld tword [A2]
3155
3156	fnstsw word [A1 + IEMFPURESULT.FSW]
3157	fnclex
3158	fstp tword [A1 + IEMFPURESULT.r80Result]
3159
3160	fninit
3161	add xSP, 20h
3162	EPILOGUE_3_ARGS
3163	ENDPROC iemAImpl_fld_r80_from_r80
3164
3165
3166	;;
3167	; Store a 80-bit floating point register to memory
3168	;
3169	; @param A0 FPU context (fxsave).
3170	; @param A1 Where to return the output FSW.
3171	; @param A2 Where to store the 80-bit value.
3172	; @param A3 Pointer to the 80-bit register value.
3173	;
3174	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
3175	PROLOGUE_4_ARGS
3176	sub xSP, 20h
3177
3178	fninit
3179	fld tword [A3]
3180	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3181	fstp tword [A2]
3182
3183	fnstsw word [A1]
3184
3185	fninit
3186	add xSP, 20h
3187	EPILOGUE_4_ARGS
3188	ENDPROC iemAImpl_fst_r80_to_r80
3189
3190
3191	;;
3192	; Loads an 80-bit floating point register value in BCD format from memory.
3193	;
3194	; @param A0 FPU context (fxsave).
3195	; @param A1 Pointer to a IEMFPURESULT for the output.
3196	; @param A2 Pointer to the 80-bit BCD value to load.
3197	;
3198	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_d80, 12
3199	PROLOGUE_3_ARGS
3200	sub xSP, 20h
3201
3202	fninit
3203	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3204	fbld tword [A2]
3205
3206	fnstsw word [A1 + IEMFPURESULT.FSW]
3207	fnclex
3208	fstp tword [A1 + IEMFPURESULT.r80Result]
3209
3210	fninit
3211	add xSP, 20h
3212	EPILOGUE_3_ARGS
3213	ENDPROC iemAImpl_fld_r80_from_d80
3214
3215
3216	;;
3217	; Store a 80-bit floating point register to memory as BCD
3218	;
3219	; @param A0 FPU context (fxsave).
3220	; @param A1 Where to return the output FSW.
3221	; @param A2 Where to store the 80-bit BCD value.
3222	; @param A3 Pointer to the 80-bit register value.
3223	;
3224	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_d80, 16
3225	PROLOGUE_4_ARGS
3226	sub xSP, 20h
3227
3228	fninit
3229	fld tword [A3]
3230	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3231	fbstp tword [A2]
3232
3233	fnstsw word [A1]
3234
3235	fninit
3236	add xSP, 20h
3237	EPILOGUE_4_ARGS
3238	ENDPROC iemAImpl_fst_r80_to_d80
3239
3240
3241	;;
3242	; FPU instruction working on two 80-bit floating point values.
3243	;
3244	; @param 1 The instruction
3245	;
3246	; @param A0 FPU context (fxsave).
3247	; @param A1 Pointer to a IEMFPURESULT for the output.
3248	; @param A2 Pointer to the first 80-bit value (ST0)
3249	; @param A3 Pointer to the second 80-bit value (STn).
3250	;
3251	%macro IEMIMPL_FPU_R80_BY_R80 2
3252	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
3253	PROLOGUE_4_ARGS
3254	sub xSP, 20h
3255
3256	fninit
3257	fld tword [A3]
3258	fld tword [A2]
3259	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3260	%1 %2
3261
3262	fnstsw word [A1 + IEMFPURESULT.FSW]
3263	fnclex
3264	fstp tword [A1 + IEMFPURESULT.r80Result]
3265
3266	fninit
3267	add xSP, 20h
3268	EPILOGUE_4_ARGS
3269	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3270	%endmacro
3271
3272	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
3273	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
3274	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
3275	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
3276	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
3277	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
3278	IEMIMPL_FPU_R80_BY_R80 fprem, {}
3279	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
3280	IEMIMPL_FPU_R80_BY_R80 fscale, {}
3281
3282
3283	;;
3284	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
3285	; storing the result in ST1 and popping the stack.
3286	;
3287	; @param 1 The instruction
3288	;
3289	; @param A0 FPU context (fxsave).
3290	; @param A1 Pointer to a IEMFPURESULT for the output.
3291	; @param A2 Pointer to the first 80-bit value (ST1).
3292	; @param A3 Pointer to the second 80-bit value (ST0).
3293	;
3294	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
3295	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
3296	PROLOGUE_4_ARGS
3297	sub xSP, 20h
3298
3299	fninit
3300	fld tword [A2]
3301	fld tword [A3]
3302	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3303	%1
3304
3305	fnstsw word [A1 + IEMFPURESULT.FSW]
3306	fnclex
3307	fstp tword [A1 + IEMFPURESULT.r80Result]
3308
3309	fninit
3310	add xSP, 20h
3311	EPILOGUE_4_ARGS
3312	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3313	%endmacro
3314
3315	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
3316	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
3317	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
3318
3319
3320	;;
3321	; FPU instruction working on two 80-bit floating point values, only
3322	; returning FSW.
3323	;
3324	; @param 1 The instruction
3325	;
3326	; @param A0 FPU context (fxsave).
3327	; @param A1 Pointer to a uint16_t for the resulting FSW.
3328	; @param A2 Pointer to the first 80-bit value.
3329	; @param A3 Pointer to the second 80-bit value.
3330	;
3331	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
3332	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
3333	PROLOGUE_4_ARGS
3334	sub xSP, 20h
3335
3336	fninit
3337	fld tword [A3]
3338	fld tword [A2]
3339	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3340	%1 st0, st1
3341
3342	fnstsw word [A1]
3343
3344	fninit
3345	add xSP, 20h
3346	EPILOGUE_4_ARGS
3347	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3348	%endmacro
3349
3350	IEMIMPL_FPU_R80_BY_R80_FSW fcom
3351	IEMIMPL_FPU_R80_BY_R80_FSW fucom
3352
3353
3354	;;
3355	; FPU instruction working on two 80-bit floating point values,
3356	; returning FSW and EFLAGS (eax).
3357	;
3358	; @param 1 The instruction
3359	;
3360	; @returns EFLAGS in EAX.
3361	; @param A0 FPU context (fxsave).
3362	; @param A1 Pointer to a uint16_t for the resulting FSW.
3363	; @param A2 Pointer to the first 80-bit value.
3364	; @param A3 Pointer to the second 80-bit value.
3365	;
3366	%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
3367	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
3368	PROLOGUE_4_ARGS
3369	sub xSP, 20h
3370
3371	fninit
3372	fld tword [A3]
3373	fld tword [A2]
3374	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3375	%1 st1
3376
3377	fnstsw word [A1]
3378	pushf
3379	pop xAX
3380
3381	fninit
3382	add xSP, 20h
3383	EPILOGUE_4_ARGS
3384	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3385	%endmacro
3386
3387	IEMIMPL_FPU_R80_BY_R80_EFL fcomi
3388	IEMIMPL_FPU_R80_BY_R80_EFL fucomi
3389
3390
3391	;;
3392	; FPU instruction working on one 80-bit floating point value.
3393	;
3394	; @param 1 The instruction
3395	;
3396	; @param A0 FPU context (fxsave).
3397	; @param A1 Pointer to a IEMFPURESULT for the output.
3398	; @param A2 Pointer to the 80-bit value.
3399	;
3400	%macro IEMIMPL_FPU_R80 1
3401	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3402	PROLOGUE_3_ARGS
3403	sub xSP, 20h
3404
3405	fninit
3406	fld tword [A2]
3407	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3408	%1
3409
3410	fnstsw word [A1 + IEMFPURESULT.FSW]
3411	fnclex
3412	fstp tword [A1 + IEMFPURESULT.r80Result]
3413
3414	fninit
3415	add xSP, 20h
3416	EPILOGUE_3_ARGS
3417	ENDPROC iemAImpl_ %+ %1 %+ _r80
3418	%endmacro
3419
3420	IEMIMPL_FPU_R80 fchs
3421	IEMIMPL_FPU_R80 fabs
3422	IEMIMPL_FPU_R80 f2xm1
3423	IEMIMPL_FPU_R80 fsqrt
3424	IEMIMPL_FPU_R80 frndint
3425	IEMIMPL_FPU_R80 fsin
3426	IEMIMPL_FPU_R80 fcos
3427
3428
3429	;;
3430	; FPU instruction working on one 80-bit floating point value, only
3431	; returning FSW.
3432	;
3433	; @param 1 The instruction
3434	; @param 2 Non-zero to also restore FTW.
3435	;
3436	; @param A0 FPU context (fxsave).
3437	; @param A1 Pointer to a uint16_t for the resulting FSW.
3438	; @param A2 Pointer to the 80-bit value.
3439	;
3440	%macro IEMIMPL_FPU_R80_FSW 2
3441	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3442	PROLOGUE_3_ARGS
3443	sub xSP, 20h
3444
3445	fninit
3446	fld tword [A2]
3447	%if %2 != 0
3448	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW_AND_FTW_0 A0
3449	%else
3450	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3451	%endif
3452	%1
3453
3454	fnstsw word [A1]
3455
3456	fninit
3457	add xSP, 20h
3458	EPILOGUE_3_ARGS
3459	ENDPROC iemAImpl_ %+ %1 %+ _r80
3460	%endmacro
3461
3462	IEMIMPL_FPU_R80_FSW ftst, 0
3463	IEMIMPL_FPU_R80_FSW fxam, 1 ; No #IS or any other FP exceptions.
3464
3465
3466
3467	;;
3468	; FPU instruction loading a 80-bit floating point constant.
3469	;
3470	; @param 1 The instruction
3471	;
3472	; @param A0 FPU context (fxsave).
3473	; @param A1 Pointer to a IEMFPURESULT for the output.
3474	;
3475	%macro IEMIMPL_FPU_R80_CONST 1
3476	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
3477	PROLOGUE_2_ARGS
3478	sub xSP, 20h
3479
3480	fninit
3481	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3482	%1
3483
3484	fnstsw word [A1 + IEMFPURESULT.FSW]
3485	fnclex
3486	fstp tword [A1 + IEMFPURESULT.r80Result]
3487
3488	fninit
3489	add xSP, 20h
3490	EPILOGUE_2_ARGS
3491	ENDPROC iemAImpl_ %+ %1 %+
3492	%endmacro
3493
3494	IEMIMPL_FPU_R80_CONST fld1
3495	IEMIMPL_FPU_R80_CONST fldl2t
3496	IEMIMPL_FPU_R80_CONST fldl2e
3497	IEMIMPL_FPU_R80_CONST fldpi
3498	IEMIMPL_FPU_R80_CONST fldlg2
3499	IEMIMPL_FPU_R80_CONST fldln2
3500	IEMIMPL_FPU_R80_CONST fldz
3501
3502
3503	;;
3504	; FPU instruction working on one 80-bit floating point value, outputing two.
3505	;
3506	; @param 1 The instruction
3507	;
3508	; @param A0 FPU context (fxsave).
3509	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
3510	; @param A2 Pointer to the 80-bit value.
3511	;
3512	%macro IEMIMPL_FPU_R80_R80 1
3513	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
3514	PROLOGUE_3_ARGS
3515	sub xSP, 20h
3516
3517	fninit
3518	fld tword [A2]
3519	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3520	%1
3521
3522	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
3523	fnclex
3524	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
3525	fnclex
3526	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
3527
3528	fninit
3529	add xSP, 20h
3530	EPILOGUE_3_ARGS
3531	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
3532	%endmacro
3533
3534	IEMIMPL_FPU_R80_R80 fptan
3535	IEMIMPL_FPU_R80_R80 fxtract
3536	IEMIMPL_FPU_R80_R80 fsincos
3537
3538
3539
3540
3541	;---------------------- SSE and MMX Operations ----------------------
3542
3543	;; @todo what do we need to do for MMX?
3544	%macro IEMIMPL_MMX_PROLOGUE 0
3545	%endmacro
3546	%macro IEMIMPL_MMX_EPILOGUE 0
3547	%endmacro
3548
3549	;; @todo what do we need to do for SSE?
3550	%macro IEMIMPL_SSE_PROLOGUE 0
3551	%endmacro
3552	%macro IEMIMPL_SSE_EPILOGUE 0
3553	%endmacro
3554
3555	;; @todo what do we need to do for AVX?
3556	%macro IEMIMPL_AVX_PROLOGUE 0
3557	%endmacro
3558	%macro IEMIMPL_AVX_EPILOGUE 0
3559	%endmacro
3560
3561
3562	;;
3563	; Media instruction working on two full sized registers.
3564	;
3565	; @param 1 The instruction
3566	; @param 2 Whether there is an MMX variant (1) or not (0).
3567	;
3568	; @param A0 FPU context (fxsave).
3569	; @param A1 Pointer to the first media register size operand (input/output).
3570	; @param A2 Pointer to the second media register size operand (input).
3571	;
3572	%macro IEMIMPL_MEDIA_F2 2
3573	%if %2 != 0
3574	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3575	PROLOGUE_3_ARGS
3576	IEMIMPL_MMX_PROLOGUE
3577
3578	movq mm0, [A1]
3579	movq mm1, [A2]
3580	%1 mm0, mm1
3581	movq [A1], mm0
3582
3583	IEMIMPL_MMX_EPILOGUE
3584	EPILOGUE_3_ARGS
3585	ENDPROC iemAImpl_ %+ %1 %+ _u64
3586	%endif
3587
3588	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3589	PROLOGUE_3_ARGS
3590	IEMIMPL_SSE_PROLOGUE
3591
3592	movdqu xmm0, [A1]
3593	movdqu xmm1, [A2]
3594	%1 xmm0, xmm1
3595	movdqu [A1], xmm0
3596
3597	IEMIMPL_SSE_EPILOGUE
3598	EPILOGUE_3_ARGS
3599	ENDPROC iemAImpl_ %+ %1 %+ _u128
3600	%endmacro
3601
3602	IEMIMPL_MEDIA_F2 pshufb, 1
3603	IEMIMPL_MEDIA_F2 pand, 1
3604	IEMIMPL_MEDIA_F2 pandn, 1
3605	IEMIMPL_MEDIA_F2 por, 1
3606	IEMIMPL_MEDIA_F2 pxor, 1
3607	IEMIMPL_MEDIA_F2 pcmpeqb, 1
3608	IEMIMPL_MEDIA_F2 pcmpeqw, 1
3609	IEMIMPL_MEDIA_F2 pcmpeqd, 1
3610	IEMIMPL_MEDIA_F2 pcmpeqq, 0
3611	IEMIMPL_MEDIA_F2 pcmpgtb, 1
3612	IEMIMPL_MEDIA_F2 pcmpgtw, 1
3613	IEMIMPL_MEDIA_F2 pcmpgtd, 1
3614	IEMIMPL_MEDIA_F2 pcmpgtq, 0
3615	IEMIMPL_MEDIA_F2 paddb, 1
3616	IEMIMPL_MEDIA_F2 paddw, 1
3617	IEMIMPL_MEDIA_F2 paddd, 1
3618	IEMIMPL_MEDIA_F2 paddq, 1
3619	IEMIMPL_MEDIA_F2 psubb, 1
3620	IEMIMPL_MEDIA_F2 psubw, 1
3621	IEMIMPL_MEDIA_F2 psubd, 1
3622	IEMIMPL_MEDIA_F2 psubq, 1
3623
3624
3625	;;
3626	; Media instruction working on one full sized and one half sized register (lower half).
3627	;
3628	; @param 1 The instruction
3629	; @param 2 1 if MMX is included, 0 if not.
3630	;
3631	; @param A0 FPU context (fxsave).
3632	; @param A1 Pointer to the first full sized media register operand (input/output).
3633	; @param A2 Pointer to the second half sized media register operand (input).
3634	;
3635	%macro IEMIMPL_MEDIA_F1L1 2
3636	%if %2 != 0
3637	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3638	PROLOGUE_3_ARGS
3639	IEMIMPL_MMX_PROLOGUE
3640
3641	movq mm0, [A1]
3642	movd mm1, [A2]
3643	%1 mm0, mm1
3644	movq [A1], mm0
3645
3646	IEMIMPL_MMX_EPILOGUE
3647	EPILOGUE_3_ARGS
3648	ENDPROC iemAImpl_ %+ %1 %+ _u64
3649	%endif
3650
3651	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3652	PROLOGUE_3_ARGS
3653	IEMIMPL_SSE_PROLOGUE
3654
3655	movdqu xmm0, [A1]
3656	movq xmm1, [A2]
3657	%1 xmm0, xmm1
3658	movdqu [A1], xmm0
3659
3660	IEMIMPL_SSE_EPILOGUE
3661	EPILOGUE_3_ARGS
3662	ENDPROC iemAImpl_ %+ %1 %+ _u128
3663	%endmacro
3664
3665	IEMIMPL_MEDIA_F1L1 punpcklbw, 1
3666	IEMIMPL_MEDIA_F1L1 punpcklwd, 1
3667	IEMIMPL_MEDIA_F1L1 punpckldq, 1
3668	IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
3669
3670
3671	;;
3672	; Media instruction working on one full sized and one half sized register (high half).
3673	;
3674	; @param 1 The instruction
3675	; @param 2 1 if MMX is included, 0 if not.
3676	;
3677	; @param A0 FPU context (fxsave).
3678	; @param A1 Pointer to the first full sized media register operand (input/output).
3679	; @param A2 Pointer to the second full sized media register operand, where we
3680	; will only use the upper half (input).
3681	;
3682	%macro IEMIMPL_MEDIA_F1H1 2
3683	%if %2 != 0
3684	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3685	PROLOGUE_3_ARGS
3686	IEMIMPL_MMX_PROLOGUE
3687
3688	movq mm0, [A1]
3689	movq mm1, [A2]
3690	%1 mm0, mm1
3691	movq [A1], mm0
3692
3693	IEMIMPL_MMX_EPILOGUE
3694	EPILOGUE_3_ARGS
3695	ENDPROC iemAImpl_ %+ %1 %+ _u64
3696	%endif
3697
3698	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3699	PROLOGUE_3_ARGS
3700	IEMIMPL_SSE_PROLOGUE
3701
3702	movdqu xmm0, [A1]
3703	movdqu xmm1, [A2]
3704	%1 xmm0, xmm1
3705	movdqu [A1], xmm0
3706
3707	IEMIMPL_SSE_EPILOGUE
3708	EPILOGUE_3_ARGS
3709	ENDPROC iemAImpl_ %+ %1 %+ _u128
3710	%endmacro
3711
3712	IEMIMPL_MEDIA_F1L1 punpckhbw, 1
3713	IEMIMPL_MEDIA_F1L1 punpckhwd, 1
3714	IEMIMPL_MEDIA_F1L1 punpckhdq, 1
3715	IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
3716
3717
3718	;
3719	; Shufflers with evil 8-bit immediates.
3720	;
3721
3722	BEGINPROC_FASTCALL iemAImpl_pshufw_u64, 16
3723	PROLOGUE_3_ARGS
3724	IEMIMPL_MMX_PROLOGUE
3725
3726	movq mm1, [A1]
3727	movq mm0, mm0 ; paranoia!
3728	lea T0, [A2 + A2*4] ; sizeof(pshufw+ret) == 5
3729	lea T1, [.imm0 xWrtRIP]
3730	lea T1, [T1 + T0]
3731	call T1
3732	movq [A0], mm0
3733
3734	IEMIMPL_MMX_EPILOGUE
3735	EPILOGUE_3_ARGS
3736	%assign bImm 0
3737	%rep 256
3738	.imm %+ bImm:
3739	pshufw mm0, mm1, bImm
3740	ret
3741	%assign bImm bImm + 1
3742	%endrep
3743	.immEnd: ; 256*5 == 0x500
3744	dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3745	dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are too small.
3746	ENDPROC iemAImpl_pshufw_u64
3747
3748
3749	%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3750	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 16
3751	PROLOGUE_3_ARGS
3752	IEMIMPL_SSE_PROLOGUE
3753
3754	movdqu xmm1, [A1]
3755	movdqu xmm0, xmm1 ; paranoia!
3756	lea T1, [.imm0 xWrtRIP]
3757	lea T0, [A2 + A22] ; sizeof(pshufXX+ret) == 6: (A3 3) *2
3758	lea T1, [T1 + T0*2]
3759	call T1
3760	movdqu [A0], xmm0
3761
3762	IEMIMPL_SSE_EPILOGUE
3763	EPILOGUE_3_ARGS
3764	%assign bImm 0
3765	%rep 256
3766	.imm %+ bImm:
3767	%1 xmm0, xmm1, bImm
3768	ret
3769	%assign bImm bImm + 1
3770	%endrep
3771	.immEnd: ; 256*6 == 0x600
3772	dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3773	dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are too small.
3774	ENDPROC iemAImpl_ %+ %1 %+ _u128
3775	%endmacro
3776
3777	IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3778	IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3779	IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3780
3781
3782	%macro IEMIMPL_MEDIA_AVX_VPSHUFXX 1
3783	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u256, 16
3784	PROLOGUE_3_ARGS
3785	IEMIMPL_SSE_PROLOGUE
3786
3787	vmovdqu ymm1, [A1]
3788	vmovdqu ymm0, ymm1 ; paranoia!
3789	lea T1, [.imm0 xWrtRIP]
3790	lea T0, [A2 + A22] ; sizeof(pshufXX+ret) == 6: (A3 3) *2
3791	lea T1, [T1 + T0*2]
3792	call T1
3793	vmovdqu [A0], ymm0
3794
3795	IEMIMPL_SSE_EPILOGUE
3796	EPILOGUE_3_ARGS
3797	%assign bImm 0
3798	%rep 256
3799	.imm %+ bImm:
3800	%1 ymm0, ymm1, bImm
3801	ret
3802	%assign bImm bImm + 1
3803	%endrep
3804	.immEnd: ; 256*6 == 0x600
3805	dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3806	dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are too small.
3807	ENDPROC iemAImpl_ %+ %1 %+ _u256
3808	%endmacro
3809
3810	IEMIMPL_MEDIA_AVX_VPSHUFXX vpshufhw
3811	IEMIMPL_MEDIA_AVX_VPSHUFXX vpshuflw
3812	IEMIMPL_MEDIA_AVX_VPSHUFXX vpshufd
3813
3814
3815	;
3816	; Move byte mask.
3817	;
3818
3819	BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 8
3820	PROLOGUE_2_ARGS
3821	IEMIMPL_MMX_PROLOGUE
3822
3823	movq mm1, [A1]
3824	pmovmskb T0, mm1
3825	mov [A0], T0
3826	%ifdef RT_ARCH_X86
3827	mov dword [A0 + 4], 0
3828	%endif
3829	IEMIMPL_MMX_EPILOGUE
3830	EPILOGUE_2_ARGS
3831	ENDPROC iemAImpl_pmovmskb_u64
3832
3833	BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 8
3834	PROLOGUE_2_ARGS
3835	IEMIMPL_SSE_PROLOGUE
3836
3837	movdqu xmm1, [A1]
3838	pmovmskb T0, xmm1
3839	mov [A0], T0
3840	%ifdef RT_ARCH_X86
3841	mov dword [A0 + 4], 0
3842	%endif
3843	IEMIMPL_SSE_EPILOGUE
3844	EPILOGUE_2_ARGS
3845	ENDPROC iemAImpl_pmovmskb_u128
3846
3847	BEGINPROC_FASTCALL iemAImpl_vpmovmskb_u256, 8
3848	PROLOGUE_2_ARGS
3849	IEMIMPL_AVX_PROLOGUE
3850
3851	vmovdqu ymm1, [A1]
3852	vpmovmskb T0, ymm1
3853	mov [A0], T0
3854	%ifdef RT_ARCH_X86
3855	mov dword [A0 + 4], 0
3856	%endif
3857	IEMIMPL_AVX_EPILOGUE
3858	EPILOGUE_2_ARGS
3859	ENDPROC iemAImpl_vpmovmskb_u256
3860
3861
3862	;;
3863	; Media instruction working on two full sized source registers and one destination (AVX).
3864	;
3865	; @param 1 The instruction
3866	;
3867	; @param A0 Pointer to the extended CPU/FPU state (X86XSAVEAREA).
3868	; @param A1 Pointer to the destination media register size operand (output).
3869	; @param A2 Pointer to the first source media register size operand (input).
3870	; @param A3 Pointer to the second source media register size operand (input).
3871	;
3872	%macro IEMIMPL_MEDIA_F3 1
3873	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 16
3874	PROLOGUE_4_ARGS
3875	IEMIMPL_AVX_PROLOGUE
3876
3877	vmovdqu xmm0, [A2]
3878	vmovdqu xmm1, [A3]
3879	%1 xmm0, xmm0, xmm1
3880	vmovdqu [A1], xmm0
3881
3882	IEMIMPL_AVX_PROLOGUE
3883	EPILOGUE_4_ARGS
3884	ENDPROC iemAImpl_ %+ %1 %+ _u128
3885
3886	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u256, 16
3887	PROLOGUE_4_ARGS
3888	IEMIMPL_SSE_PROLOGUE
3889
3890	vmovdqu ymm0, [A2]
3891	vmovdqu ymm1, [A3]
3892	%1 ymm0, ymm0, ymm1
3893	vmovdqu [A1], ymm0
3894
3895	IEMIMPL_AVX_PROLOGUE
3896	EPILOGUE_4_ARGS
3897	ENDPROC iemAImpl_ %+ %1 %+ _u256
3898	%endmacro
3899
3900	IEMIMPL_MEDIA_F3 vpshufb
3901	IEMIMPL_MEDIA_F3 vpand
3902	IEMIMPL_MEDIA_F3 vpandn
3903	IEMIMPL_MEDIA_F3 vpor
3904	IEMIMPL_MEDIA_F3 vpxor
3905	IEMIMPL_MEDIA_F3 vpcmpeqb
3906	IEMIMPL_MEDIA_F3 vpcmpeqw
3907	IEMIMPL_MEDIA_F3 vpcmpeqd
3908	IEMIMPL_MEDIA_F3 vpcmpeqq
3909	IEMIMPL_MEDIA_F3 vpcmpgtb
3910	IEMIMPL_MEDIA_F3 vpcmpgtw
3911	IEMIMPL_MEDIA_F3 vpcmpgtd
3912	IEMIMPL_MEDIA_F3 vpcmpgtq
3913	IEMIMPL_MEDIA_F3 vpaddb
3914	IEMIMPL_MEDIA_F3 vpaddw
3915	IEMIMPL_MEDIA_F3 vpaddd
3916	IEMIMPL_MEDIA_F3 vpaddq
3917	IEMIMPL_MEDIA_F3 vpsubb
3918	IEMIMPL_MEDIA_F3 vpsubw
3919	IEMIMPL_MEDIA_F3 vpsubd
3920	IEMIMPL_MEDIA_F3 vpsubq
3921
3922
3923	;
3924	; The SSE 4.2 crc32
3925	;
3926	; @param 1 The instruction
3927	;
3928	; @param A1 Pointer to the 32-bit destination.
3929	; @param A2 The source operand, sized according to the suffix.
3930	;
3931
3932	BEGINPROC_FASTCALL iemAImpl_crc32_u8, 8
3933	PROLOGUE_2_ARGS
3934
3935	mov T0_32, [A0]
3936	crc32 T0_32, A1_8
3937	mov [A0], T0_32
3938
3939	EPILOGUE_2_ARGS
3940	ENDPROC iemAImpl_crc32_u8
3941
3942	BEGINPROC_FASTCALL iemAImpl_crc32_u16, 8
3943	PROLOGUE_2_ARGS
3944
3945	mov T0_32, [A0]
3946	crc32 T0_32, A1_16
3947	mov [A0], T0_32
3948
3949	EPILOGUE_2_ARGS
3950	ENDPROC iemAImpl_crc32_u16
3951
3952	BEGINPROC_FASTCALL iemAImpl_crc32_u32, 8
3953	PROLOGUE_2_ARGS
3954
3955	mov T0_32, [A0]
3956	crc32 T0_32, A1_32
3957	mov [A0], T0_32
3958
3959	EPILOGUE_2_ARGS
3960	ENDPROC iemAImpl_crc32_u32
3961
3962	%ifdef RT_ARCH_AMD64
3963	BEGINPROC_FASTCALL iemAImpl_crc32_u64, 8
3964	PROLOGUE_2_ARGS
3965
3966	mov T0_32, [A0]
3967	crc32 T0, A1
3968	mov [A0], T0_32
3969
3970	EPILOGUE_2_ARGS
3971	ENDPROC iemAImpl_crc32_u64
3972	%endif
3973

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 95499

Download in other formats: