IEMAllAImpl.asm@ 62478

Last change on this file since 62478 was 62478, checked in by vboxsync, 8 years ago
(C) 2016
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 79.8 KB

Line
1	; $Id: IEMAllAImpl.asm 62478 2016-07-22 18:29:06Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	; Copyright (C) 2011-2016 Oracle Corporation
7	;
8	; This file is part of VirtualBox Open Source Edition (OSE), as
9	; available from http://www.virtualbox.org. This file is free software;
10	; you can redistribute it and/or modify it under the terms of the GNU
11	; General Public License (GPL) as published by the Free Software
12	; Foundation, in version 2 as it comes in the "COPYING" file of the
13	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	;
16
17
18	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19	; Header Files ;
20	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21	%include "VBox/asmdefs.mac"
22	%include "VBox/err.mac"
23	%include "iprt/x86.mac"
24
25
26	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27	; Defined Constants And Macros ;
28	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30	;;
31	; RET XX / RET wrapper for fastcall.
32	;
33	%macro RET_FASTCALL 1
34	%ifdef RT_ARCH_X86
35	%ifdef RT_OS_WINDOWS
36	ret %1
37	%else
38	ret
39	%endif
40	%else
41	ret
42	%endif
43	%endmacro
44
45	;;
46	; NAME for fastcall functions.
47	;
48	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49	; escaping (or whatever the dollar is good for here). Thus the ugly
50	; prefix argument.
51	;
52	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53	%ifdef RT_ARCH_X86
54	%ifdef RT_OS_WINDOWS
55	%undef NAME_FASTCALL
56	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57	%endif
58	%endif
59
60	;;
61	; BEGINPROC for fastcall functions.
62	;
63	; @param 1 The function name (C).
64	; @param 2 The argument size on x86.
65	;
66	%macro BEGINPROC_FASTCALL 2
67	%ifdef ASM_FORMAT_PE
68	export %1=NAME_FASTCALL(%1,%2,$@)
69	%endif
70	%ifdef __NASM__
71	%ifdef ASM_FORMAT_OMF
72	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73	%endif
74	%endif
75	%ifndef ASM_FORMAT_BIN
76	global NAME_FASTCALL(%1,%2,$@)
77	%endif
78	NAME_FASTCALL(%1,%2,@):
79	%endmacro
80
81
82	;
83	; We employ some macro assembly here to hid the calling convention differences.
84	;
85	%ifdef RT_ARCH_AMD64
86	%macro PROLOGUE_1_ARGS 0
87	%endmacro
88	%macro EPILOGUE_1_ARGS 0
89	ret
90	%endmacro
91	%macro EPILOGUE_1_ARGS_EX 0
92	ret
93	%endmacro
94
95	%macro PROLOGUE_2_ARGS 0
96	%endmacro
97	%macro EPILOGUE_2_ARGS 0
98	ret
99	%endmacro
100	%macro EPILOGUE_2_ARGS_EX 1
101	ret
102	%endmacro
103
104	%macro PROLOGUE_3_ARGS 0
105	%endmacro
106	%macro EPILOGUE_3_ARGS 0
107	ret
108	%endmacro
109	%macro EPILOGUE_3_ARGS_EX 1
110	ret
111	%endmacro
112
113	%macro PROLOGUE_4_ARGS 0
114	%endmacro
115	%macro EPILOGUE_4_ARGS 0
116	ret
117	%endmacro
118	%macro EPILOGUE_4_ARGS_EX 1
119	ret
120	%endmacro
121
122	%ifdef ASM_CALL64_GCC
123	%define A0 rdi
124	%define A0_32 edi
125	%define A0_16 di
126	%define A0_8 dil
127
128	%define A1 rsi
129	%define A1_32 esi
130	%define A1_16 si
131	%define A1_8 sil
132
133	%define A2 rdx
134	%define A2_32 edx
135	%define A2_16 dx
136	%define A2_8 dl
137
138	%define A3 rcx
139	%define A3_32 ecx
140	%define A3_16 cx
141	%endif
142
143	%ifdef ASM_CALL64_MSC
144	%define A0 rcx
145	%define A0_32 ecx
146	%define A0_16 cx
147	%define A0_8 cl
148
149	%define A1 rdx
150	%define A1_32 edx
151	%define A1_16 dx
152	%define A1_8 dl
153
154	%define A2 r8
155	%define A2_32 r8d
156	%define A2_16 r8w
157	%define A2_8 r8b
158
159	%define A3 r9
160	%define A3_32 r9d
161	%define A3_16 r9w
162	%endif
163
164	%define T0 rax
165	%define T0_32 eax
166	%define T0_16 ax
167	%define T0_8 al
168
169	%define T1 r11
170	%define T1_32 r11d
171	%define T1_16 r11w
172	%define T1_8 r11b
173
174	%else
175	; x86
176	%macro PROLOGUE_1_ARGS 0
177	push edi
178	%endmacro
179	%macro EPILOGUE_1_ARGS 0
180	pop edi
181	ret 0
182	%endmacro
183	%macro EPILOGUE_1_ARGS_EX 1
184	pop edi
185	ret %1
186	%endmacro
187
188	%macro PROLOGUE_2_ARGS 0
189	push edi
190	%endmacro
191	%macro EPILOGUE_2_ARGS 0
192	pop edi
193	ret 0
194	%endmacro
195	%macro EPILOGUE_2_ARGS_EX 1
196	pop edi
197	ret %1
198	%endmacro
199
200	%macro PROLOGUE_3_ARGS 0
201	push ebx
202	mov ebx, [esp + 4 + 4]
203	push edi
204	%endmacro
205	%macro EPILOGUE_3_ARGS_EX 1
206	%if (%1) < 4
207	%error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208	%endif
209	pop edi
210	pop ebx
211	ret %1
212	%endmacro
213	%macro EPILOGUE_3_ARGS 0
214	EPILOGUE_3_ARGS_EX 4
215	%endmacro
216
217	%macro PROLOGUE_4_ARGS 0
218	push ebx
219	push edi
220	push esi
221	mov ebx, [esp + 12 + 4 + 0]
222	mov esi, [esp + 12 + 4 + 4]
223	%endmacro
224	%macro EPILOGUE_4_ARGS_EX 1
225	%if (%1) < 8
226	%error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227	%endif
228	pop esi
229	pop edi
230	pop ebx
231	ret %1
232	%endmacro
233	%macro EPILOGUE_4_ARGS 0
234	EPILOGUE_4_ARGS_EX 8
235	%endmacro
236
237	%define A0 ecx
238	%define A0_32 ecx
239	%define A0_16 cx
240	%define A0_8 cl
241
242	%define A1 edx
243	%define A1_32 edx
244	%define A1_16 dx
245	%define A1_8 dl
246
247	%define A2 ebx
248	%define A2_32 ebx
249	%define A2_16 bx
250	%define A2_8 bl
251
252	%define A3 esi
253	%define A3_32 esi
254	%define A3_16 si
255
256	%define T0 eax
257	%define T0_32 eax
258	%define T0_16 ax
259	%define T0_8 al
260
261	%define T1 edi
262	%define T1_32 edi
263	%define T1_16 di
264	%endif
265
266
267	;;
268	; Load the relevant flags from [%1] if there are undefined flags (%3).
269	;
270	; @remarks Clobbers T0, stack. Changes EFLAGS.
271	; @param A2 The register pointing to the flags.
272	; @param 1 The parameter (A0..A3) pointing to the eflags.
273	; @param 2 The set of modified flags.
274	; @param 3 The set of undefined flags.
275	;
276	%macro IEM_MAYBE_LOAD_FLAGS 3
277	;%if (%3) != 0
278	pushf ; store current flags
279	mov T0_32, [%1] ; load the guest flags
280	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
281	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
282	or [xSP], T0 ; merge guest flags with host flags.
283	popf ; load the mixed flags.
284	;%endif
285	%endmacro
286
287	;;
288	; Update the flag.
289	;
290	; @remarks Clobbers T0, T1, stack.
291	; @param 1 The register pointing to the EFLAGS.
292	; @param 2 The mask of modified flags to save.
293	; @param 3 The mask of undefined flags to (maybe) save.
294	;
295	%macro IEM_SAVE_FLAGS 3
296	%if (%2 \| %3) != 0
297	pushf
298	pop T1
299	mov T0_32, [%1] ; flags
300	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
301	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
302	or T0_32, T1_32 ; combine the flags.
303	mov [%1], T0_32 ; save the flags.
304	%endif
305	%endmacro
306
307
308	;;
309	; Macro for implementing a binary operator.
310	;
311	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312	; variants, except on 32-bit system where the 64-bit accesses requires hand
313	; coding.
314	;
315	; All the functions takes a pointer to the destination memory operand in A0,
316	; the source register operand in A1 and a pointer to eflags in A2.
317	;
318	; @param 1 The instruction mnemonic.
319	; @param 2 Non-zero if there should be a locked version.
320	; @param 3 The modified flags.
321	; @param 4 The undefined flags.
322	;
323	%macro IEMIMPL_BIN_OP 4
324	BEGINCODE
325	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326	PROLOGUE_3_ARGS
327	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328	%1 byte [A0], A1_8
329	IEM_SAVE_FLAGS A2, %3, %4
330	EPILOGUE_3_ARGS
331	ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334	PROLOGUE_3_ARGS
335	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336	%1 word [A0], A1_16
337	IEM_SAVE_FLAGS A2, %3, %4
338	EPILOGUE_3_ARGS
339	ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342	PROLOGUE_3_ARGS
343	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344	%1 dword [A0], A1_32
345	IEM_SAVE_FLAGS A2, %3, %4
346	EPILOGUE_3_ARGS
347	ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349	%ifdef RT_ARCH_AMD64
350	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351	PROLOGUE_3_ARGS
352	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353	%1 qword [A0], A1
354	IEM_SAVE_FLAGS A2, %3, %4
355	EPILOGUE_3_ARGS_EX 8
356	ENDPROC iemAImpl_ %+ %1 %+ _u64
357	%endif ; RT_ARCH_AMD64
358
359	%if %2 != 0 ; locked versions requested?
360
361	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
362	PROLOGUE_3_ARGS
363	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
364	lock %1 byte [A0], A1_8
365	IEM_SAVE_FLAGS A2, %3, %4
366	EPILOGUE_3_ARGS
367	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
368
369	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
370	PROLOGUE_3_ARGS
371	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
372	lock %1 word [A0], A1_16
373	IEM_SAVE_FLAGS A2, %3, %4
374	EPILOGUE_3_ARGS
375	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
376
377	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
378	PROLOGUE_3_ARGS
379	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
380	lock %1 dword [A0], A1_32
381	IEM_SAVE_FLAGS A2, %3, %4
382	EPILOGUE_3_ARGS
383	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
384
385	%ifdef RT_ARCH_AMD64
386	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
387	PROLOGUE_3_ARGS
388	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
389	lock %1 qword [A0], A1
390	IEM_SAVE_FLAGS A2, %3, %4
391	EPILOGUE_3_ARGS_EX 8
392	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
393	%endif ; RT_ARCH_AMD64
394	%endif ; locked
395	%endmacro
396
397	; instr,lock,modified-flags.
398	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
399	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
400	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
401	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
402	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
403	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
404	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
405	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
406	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF
407
408
409	;;
410	; Macro for implementing a bit operator.
411	;
412	; This will generate code for the 16, 32 and 64 bit accesses with locked
413	; variants, except on 32-bit system where the 64-bit accesses requires hand
414	; coding.
415	;
416	; All the functions takes a pointer to the destination memory operand in A0,
417	; the source register operand in A1 and a pointer to eflags in A2.
418	;
419	; @param 1 The instruction mnemonic.
420	; @param 2 Non-zero if there should be a locked version.
421	; @param 3 The modified flags.
422	; @param 4 The undefined flags.
423	;
424	%macro IEMIMPL_BIT_OP 4
425	BEGINCODE
426	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
427	PROLOGUE_3_ARGS
428	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
429	%1 word [A0], A1_16
430	IEM_SAVE_FLAGS A2, %3, %4
431	EPILOGUE_3_ARGS
432	ENDPROC iemAImpl_ %+ %1 %+ _u16
433
434	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
435	PROLOGUE_3_ARGS
436	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
437	%1 dword [A0], A1_32
438	IEM_SAVE_FLAGS A2, %3, %4
439	EPILOGUE_3_ARGS
440	ENDPROC iemAImpl_ %+ %1 %+ _u32
441
442	%ifdef RT_ARCH_AMD64
443	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
444	PROLOGUE_3_ARGS
445	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
446	%1 qword [A0], A1
447	IEM_SAVE_FLAGS A2, %3, %4
448	EPILOGUE_3_ARGS_EX 8
449	ENDPROC iemAImpl_ %+ %1 %+ _u64
450	%endif ; RT_ARCH_AMD64
451
452	%if %2 != 0 ; locked versions requested?
453
454	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
455	PROLOGUE_3_ARGS
456	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
457	lock %1 word [A0], A1_16
458	IEM_SAVE_FLAGS A2, %3, %4
459	EPILOGUE_3_ARGS
460	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
461
462	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
463	PROLOGUE_3_ARGS
464	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
465	lock %1 dword [A0], A1_32
466	IEM_SAVE_FLAGS A2, %3, %4
467	EPILOGUE_3_ARGS
468	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
469
470	%ifdef RT_ARCH_AMD64
471	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
472	PROLOGUE_3_ARGS
473	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
474	lock %1 qword [A0], A1
475	IEM_SAVE_FLAGS A2, %3, %4
476	EPILOGUE_3_ARGS_EX 8
477	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
478	%endif ; RT_ARCH_AMD64
479	%endif ; locked
480	%endmacro
481	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
482	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
483	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
484	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
485
486	;;
487	; Macro for implementing a bit search operator.
488	;
489	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
490	; system where the 64-bit accesses requires hand coding.
491	;
492	; All the functions takes a pointer to the destination memory operand in A0,
493	; the source register operand in A1 and a pointer to eflags in A2.
494	;
495	; @param 1 The instruction mnemonic.
496	; @param 2 The modified flags.
497	; @param 3 The undefined flags.
498	;
499	%macro IEMIMPL_BIT_OP 3
500	BEGINCODE
501	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
502	PROLOGUE_3_ARGS
503	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
504	%1 T0_16, A1_16
505	jz .unchanged_dst
506	mov [A0], T0_16
507	.unchanged_dst:
508	IEM_SAVE_FLAGS A2, %2, %3
509	EPILOGUE_3_ARGS
510	ENDPROC iemAImpl_ %+ %1 %+ _u16
511
512	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
513	PROLOGUE_3_ARGS
514	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
515	%1 T0_32, A1_32
516	jz .unchanged_dst
517	mov [A0], T0_32
518	.unchanged_dst:
519	IEM_SAVE_FLAGS A2, %2, %3
520	EPILOGUE_3_ARGS
521	ENDPROC iemAImpl_ %+ %1 %+ _u32
522
523	%ifdef RT_ARCH_AMD64
524	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
525	PROLOGUE_3_ARGS
526	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
527	%1 T0, A1
528	jz .unchanged_dst
529	mov [A0], T0
530	.unchanged_dst:
531	IEM_SAVE_FLAGS A2, %2, %3
532	EPILOGUE_3_ARGS_EX 8
533	ENDPROC iemAImpl_ %+ %1 %+ _u64
534	%endif ; RT_ARCH_AMD64
535	%endmacro
536	IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
537	IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
538
539
540	;
541	; IMUL is also a similar but yet different case (no lock, no mem dst).
542	; The rDX:rAX variant of imul is handled together with mul further down.
543	;
544	BEGINCODE
545	BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
546	PROLOGUE_3_ARGS
547	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
548	imul A1_16, word [A0]
549	mov [A0], A1_16
550	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
551	EPILOGUE_3_ARGS
552	ENDPROC iemAImpl_imul_two_u16
553
554	BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
555	PROLOGUE_3_ARGS
556	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
557	imul A1_32, dword [A0]
558	mov [A0], A1_32
559	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
560	EPILOGUE_3_ARGS
561	ENDPROC iemAImpl_imul_two_u32
562
563	%ifdef RT_ARCH_AMD64
564	BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
565	PROLOGUE_3_ARGS
566	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
567	imul A1, qword [A0]
568	mov [A0], A1
569	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
570	EPILOGUE_3_ARGS_EX 8
571	ENDPROC iemAImpl_imul_two_u64
572	%endif ; RT_ARCH_AMD64
573
574
575	;
576	; XCHG for memory operands. This implies locking. No flag changes.
577	;
578	; Each function takes two arguments, first the pointer to the memory,
579	; then the pointer to the register. They all return void.
580	;
581	BEGINCODE
582	BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
583	PROLOGUE_2_ARGS
584	mov T0_8, [A1]
585	xchg [A0], T0_8
586	mov [A1], T0_8
587	EPILOGUE_2_ARGS
588	ENDPROC iemAImpl_xchg_u8
589
590	BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
591	PROLOGUE_2_ARGS
592	mov T0_16, [A1]
593	xchg [A0], T0_16
594	mov [A1], T0_16
595	EPILOGUE_2_ARGS
596	ENDPROC iemAImpl_xchg_u16
597
598	BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
599	PROLOGUE_2_ARGS
600	mov T0_32, [A1]
601	xchg [A0], T0_32
602	mov [A1], T0_32
603	EPILOGUE_2_ARGS
604	ENDPROC iemAImpl_xchg_u32
605
606	%ifdef RT_ARCH_AMD64
607	BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
608	PROLOGUE_2_ARGS
609	mov T0, [A1]
610	xchg [A0], T0
611	mov [A1], T0
612	EPILOGUE_2_ARGS
613	ENDPROC iemAImpl_xchg_u64
614	%endif
615
616
617	;
618	; XADD for memory operands.
619	;
620	; Each function takes three arguments, first the pointer to the
621	; memory/register, then the pointer to the register, and finally a pointer to
622	; eflags. They all return void.
623	;
624	BEGINCODE
625	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
626	PROLOGUE_3_ARGS
627	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
628	mov T0_8, [A1]
629	xadd [A0], T0_8
630	mov [A1], T0_8
631	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
632	EPILOGUE_3_ARGS
633	ENDPROC iemAImpl_xadd_u8
634
635	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
636	PROLOGUE_3_ARGS
637	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
638	mov T0_16, [A1]
639	xadd [A0], T0_16
640	mov [A1], T0_16
641	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
642	EPILOGUE_3_ARGS
643	ENDPROC iemAImpl_xadd_u16
644
645	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
646	PROLOGUE_3_ARGS
647	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
648	mov T0_32, [A1]
649	xadd [A0], T0_32
650	mov [A1], T0_32
651	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
652	EPILOGUE_3_ARGS
653	ENDPROC iemAImpl_xadd_u32
654
655	%ifdef RT_ARCH_AMD64
656	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
657	PROLOGUE_3_ARGS
658	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
659	mov T0, [A1]
660	xadd [A0], T0
661	mov [A1], T0
662	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
663	EPILOGUE_3_ARGS
664	ENDPROC iemAImpl_xadd_u64
665	%endif ; RT_ARCH_AMD64
666
667	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
668	PROLOGUE_3_ARGS
669	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
670	mov T0_8, [A1]
671	lock xadd [A0], T0_8
672	mov [A1], T0_8
673	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
674	EPILOGUE_3_ARGS
675	ENDPROC iemAImpl_xadd_u8_locked
676
677	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
678	PROLOGUE_3_ARGS
679	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
680	mov T0_16, [A1]
681	lock xadd [A0], T0_16
682	mov [A1], T0_16
683	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
684	EPILOGUE_3_ARGS
685	ENDPROC iemAImpl_xadd_u16_locked
686
687	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
688	PROLOGUE_3_ARGS
689	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
690	mov T0_32, [A1]
691	lock xadd [A0], T0_32
692	mov [A1], T0_32
693	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
694	EPILOGUE_3_ARGS
695	ENDPROC iemAImpl_xadd_u32_locked
696
697	%ifdef RT_ARCH_AMD64
698	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
699	PROLOGUE_3_ARGS
700	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
701	mov T0, [A1]
702	lock xadd [A0], T0
703	mov [A1], T0
704	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
705	EPILOGUE_3_ARGS
706	ENDPROC iemAImpl_xadd_u64_locked
707	%endif ; RT_ARCH_AMD64
708
709
710	;
711	; CMPXCHG8B.
712	;
713	; These are tricky register wise, so the code is duplicated for each calling
714	; convention.
715	;
716	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
717	;
718	; C-proto:
719	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
720	; uint32_t *pEFlags));
721	;
722	BEGINCODE
723	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
724	%ifdef RT_ARCH_AMD64
725	%ifdef ASM_CALL64_MSC
726	push rbx
727
728	mov r11, rdx ; pu64EaxEdx (is also T1)
729	mov r10, rcx ; pu64Dst
730
731	mov ebx, [r8]
732	mov ecx, [r8 + 4]
733	IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
734	mov eax, [r11]
735	mov edx, [r11 + 4]
736
737	lock cmpxchg8b [r10]
738
739	mov [r11], eax
740	mov [r11 + 4], edx
741	IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
742
743	pop rbx
744	ret
745	%else
746	push rbx
747
748	mov r10, rcx ; pEFlags
749	mov r11, rdx ; pu64EbxEcx (is also T1)
750
751	mov ebx, [r11]
752	mov ecx, [r11 + 4]
753	IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
754	mov eax, [rsi]
755	mov edx, [rsi + 4]
756
757	lock cmpxchg8b [rdi]
758
759	mov [rsi], eax
760	mov [rsi + 4], edx
761	IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
762
763	pop rbx
764	ret
765
766	%endif
767	%else
768	push esi
769	push edi
770	push ebx
771	push ebp
772
773	mov edi, ecx ; pu64Dst
774	mov esi, edx ; pu64EaxEdx
775	mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
776	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
777
778	mov ebx, [ecx]
779	mov ecx, [ecx + 4]
780	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
781	mov eax, [esi]
782	mov edx, [esi + 4]
783
784	lock cmpxchg8b [edi]
785
786	mov [esi], eax
787	mov [esi + 4], edx
788	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
789
790	pop ebp
791	pop ebx
792	pop edi
793	pop esi
794	ret 8
795	%endif
796	ENDPROC iemAImpl_cmpxchg8b
797
798	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
799	; Lazy bird always lock prefixes cmpxchg8b.
800	jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
801	ENDPROC iemAImpl_cmpxchg8b_locked
802
803
804
805	;
806	; CMPXCHG.
807	;
808	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
809	;
810	; C-proto:
811	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t puXDst, uintX_t puEax, uintX_t uReg, uint32_t pEFlags));
812	;
813	BEGINCODE
814	%macro IEMIMPL_CMPXCHG 2
815	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
816	PROLOGUE_4_ARGS
817	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
818	mov al, [A1]
819	%1 cmpxchg [A0], A2_8
820	mov [A1], al
821	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
822	EPILOGUE_4_ARGS
823	ENDPROC iemAImpl_cmpxchg_u8 %+ %2
824
825	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
826	PROLOGUE_4_ARGS
827	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
828	mov ax, [A1]
829	%1 cmpxchg [A0], A2_16
830	mov [A1], ax
831	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
832	EPILOGUE_4_ARGS
833	ENDPROC iemAImpl_cmpxchg_u16 %+ %2
834
835	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
836	PROLOGUE_4_ARGS
837	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
838	mov eax, [A1]
839	%1 cmpxchg [A0], A2_32
840	mov [A1], eax
841	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
842	EPILOGUE_4_ARGS
843	ENDPROC iemAImpl_cmpxchg_u32 %+ %2
844
845	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
846	%ifdef RT_ARCH_AMD64
847	PROLOGUE_4_ARGS
848	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
849	mov rax, [A1]
850	%1 cmpxchg [A0], A2
851	mov [A1], rax
852	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
853	EPILOGUE_4_ARGS
854	%else
855	;
856	; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
857	;
858	push esi
859	push edi
860	push ebx
861	push ebp
862
863	mov edi, ecx ; pu64Dst
864	mov esi, edx ; pu64Rax
865	mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
866	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
867
868	mov ebx, [ecx]
869	mov ecx, [ecx + 4]
870	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
871	mov eax, [esi]
872	mov edx, [esi + 4]
873
874	lock cmpxchg8b [edi]
875
876	; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
877	jz .cmpxchg8b_not_equal
878	cmp eax, eax ; just set the other flags.
879	.store:
880	mov [esi], eax
881	mov [esi + 4], edx
882	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
883
884	pop ebp
885	pop ebx
886	pop edi
887	pop esi
888	ret 8
889
890	.cmpxchg8b_not_equal:
891	cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
892	jne .store
893	cmp [esi], eax
894	jmp .store
895
896	%endif
897	ENDPROC iemAImpl_cmpxchg_u64 %+ %2
898	%endmacro ; IEMIMPL_CMPXCHG
899
900	IEMIMPL_CMPXCHG , ,
901	IEMIMPL_CMPXCHG lock, _locked
902
903	;;
904	; Macro for implementing a unary operator.
905	;
906	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
907	; variants, except on 32-bit system where the 64-bit accesses requires hand
908	; coding.
909	;
910	; All the functions takes a pointer to the destination memory operand in A0,
911	; the source register operand in A1 and a pointer to eflags in A2.
912	;
913	; @param 1 The instruction mnemonic.
914	; @param 2 The modified flags.
915	; @param 3 The undefined flags.
916	;
917	%macro IEMIMPL_UNARY_OP 3
918	BEGINCODE
919	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
920	PROLOGUE_2_ARGS
921	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
922	%1 byte [A0]
923	IEM_SAVE_FLAGS A1, %2, %3
924	EPILOGUE_2_ARGS
925	ENDPROC iemAImpl_ %+ %1 %+ _u8
926
927	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
928	PROLOGUE_2_ARGS
929	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
930	lock %1 byte [A0]
931	IEM_SAVE_FLAGS A1, %2, %3
932	EPILOGUE_2_ARGS
933	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
934
935	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
936	PROLOGUE_2_ARGS
937	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
938	%1 word [A0]
939	IEM_SAVE_FLAGS A1, %2, %3
940	EPILOGUE_2_ARGS
941	ENDPROC iemAImpl_ %+ %1 %+ _u16
942
943	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
944	PROLOGUE_2_ARGS
945	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
946	lock %1 word [A0]
947	IEM_SAVE_FLAGS A1, %2, %3
948	EPILOGUE_2_ARGS
949	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
950
951	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
952	PROLOGUE_2_ARGS
953	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
954	%1 dword [A0]
955	IEM_SAVE_FLAGS A1, %2, %3
956	EPILOGUE_2_ARGS
957	ENDPROC iemAImpl_ %+ %1 %+ _u32
958
959	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
960	PROLOGUE_2_ARGS
961	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
962	lock %1 dword [A0]
963	IEM_SAVE_FLAGS A1, %2, %3
964	EPILOGUE_2_ARGS
965	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
966
967	%ifdef RT_ARCH_AMD64
968	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
969	PROLOGUE_2_ARGS
970	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
971	%1 qword [A0]
972	IEM_SAVE_FLAGS A1, %2, %3
973	EPILOGUE_2_ARGS
974	ENDPROC iemAImpl_ %+ %1 %+ _u64
975
976	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
977	PROLOGUE_2_ARGS
978	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
979	lock %1 qword [A0]
980	IEM_SAVE_FLAGS A1, %2, %3
981	EPILOGUE_2_ARGS
982	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
983	%endif ; RT_ARCH_AMD64
984
985	%endmacro
986
987	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
988	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
989	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
990	IEMIMPL_UNARY_OP not, 0, 0
991
992
993	;;
994	; Macro for implementing memory fence operation.
995	;
996	; No return value, no operands or anything.
997	;
998	; @param 1 The instruction.
999	;
1000	%macro IEMIMPL_MEM_FENCE 1
1001	BEGINCODE
1002	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1003	%1
1004	ret
1005	ENDPROC iemAImpl_ %+ %1
1006	%endmacro
1007
1008	IEMIMPL_MEM_FENCE lfence
1009	IEMIMPL_MEM_FENCE sfence
1010	IEMIMPL_MEM_FENCE mfence
1011
1012	;;
1013	; Alternative for non-SSE2 host.
1014	;
1015	BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1016	push xAX
1017	xchg xAX, [xSP]
1018	add xSP, xCB
1019	ret
1020	ENDPROC iemAImpl_alt_mem_fence
1021
1022
1023
1024	;;
1025	; Macro for implementing a shift operation.
1026	;
1027	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1028	; 32-bit system where the 64-bit accesses requires hand coding.
1029	;
1030	; All the functions takes a pointer to the destination memory operand in A0,
1031	; the shift count in A1 and a pointer to eflags in A2.
1032	;
1033	; @param 1 The instruction mnemonic.
1034	; @param 2 The modified flags.
1035	; @param 3 The undefined flags.
1036	;
1037	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1038	;
1039	%macro IEMIMPL_SHIFT_OP 3
1040	BEGINCODE
1041	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1042	PROLOGUE_3_ARGS
1043	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1044	%ifdef ASM_CALL64_GCC
1045	mov cl, A1_8
1046	%1 byte [A0], cl
1047	%else
1048	xchg A1, A0
1049	%1 byte [A1], cl
1050	%endif
1051	IEM_SAVE_FLAGS A2, %2, %3
1052	EPILOGUE_3_ARGS
1053	ENDPROC iemAImpl_ %+ %1 %+ _u8
1054
1055	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1056	PROLOGUE_3_ARGS
1057	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1058	%ifdef ASM_CALL64_GCC
1059	mov cl, A1_8
1060	%1 word [A0], cl
1061	%else
1062	xchg A1, A0
1063	%1 word [A1], cl
1064	%endif
1065	IEM_SAVE_FLAGS A2, %2, %3
1066	EPILOGUE_3_ARGS
1067	ENDPROC iemAImpl_ %+ %1 %+ _u16
1068
1069	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1070	PROLOGUE_3_ARGS
1071	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1072	%ifdef ASM_CALL64_GCC
1073	mov cl, A1_8
1074	%1 dword [A0], cl
1075	%else
1076	xchg A1, A0
1077	%1 dword [A1], cl
1078	%endif
1079	IEM_SAVE_FLAGS A2, %2, %3
1080	EPILOGUE_3_ARGS
1081	ENDPROC iemAImpl_ %+ %1 %+ _u32
1082
1083	%ifdef RT_ARCH_AMD64
1084	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1085	PROLOGUE_3_ARGS
1086	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1087	%ifdef ASM_CALL64_GCC
1088	mov cl, A1_8
1089	%1 qword [A0], cl
1090	%else
1091	xchg A1, A0
1092	%1 qword [A1], cl
1093	%endif
1094	IEM_SAVE_FLAGS A2, %2, %3
1095	EPILOGUE_3_ARGS
1096	ENDPROC iemAImpl_ %+ %1 %+ _u64
1097	%endif ; RT_ARCH_AMD64
1098
1099	%endmacro
1100
1101	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
1102	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
1103	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
1104	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
1105	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1106	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1107	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1108
1109
1110	;;
1111	; Macro for implementing a double precision shift operation.
1112	;
1113	; This will generate code for the 16, 32 and 64 bit accesses, except on
1114	; 32-bit system where the 64-bit accesses requires hand coding.
1115	;
1116	; The functions takes the destination operand (r/m) in A0, the source (reg) in
1117	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1118	;
1119	; @param 1 The instruction mnemonic.
1120	; @param 2 The modified flags.
1121	; @param 3 The undefined flags.
1122	;
1123	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1124	;
1125	%macro IEMIMPL_SHIFT_DBL_OP 3
1126	BEGINCODE
1127	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1128	PROLOGUE_4_ARGS
1129	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1130	%ifdef ASM_CALL64_GCC
1131	xchg A3, A2
1132	%1 [A0], A1_16, cl
1133	xchg A3, A2
1134	%else
1135	xchg A0, A2
1136	%1 [A2], A1_16, cl
1137	%endif
1138	IEM_SAVE_FLAGS A3, %2, %3
1139	EPILOGUE_4_ARGS
1140	ENDPROC iemAImpl_ %+ %1 %+ _u16
1141
1142	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1143	PROLOGUE_4_ARGS
1144	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1145	%ifdef ASM_CALL64_GCC
1146	xchg A3, A2
1147	%1 [A0], A1_32, cl
1148	xchg A3, A2
1149	%else
1150	xchg A0, A2
1151	%1 [A2], A1_32, cl
1152	%endif
1153	IEM_SAVE_FLAGS A3, %2, %3
1154	EPILOGUE_4_ARGS
1155	ENDPROC iemAImpl_ %+ %1 %+ _u32
1156
1157	%ifdef RT_ARCH_AMD64
1158	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1159	PROLOGUE_4_ARGS
1160	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1161	%ifdef ASM_CALL64_GCC
1162	xchg A3, A2
1163	%1 [A0], A1, cl
1164	xchg A3, A2
1165	%else
1166	xchg A0, A2
1167	%1 [A2], A1, cl
1168	%endif
1169	IEM_SAVE_FLAGS A3, %2, %3
1170	EPILOGUE_4_ARGS_EX 12
1171	ENDPROC iemAImpl_ %+ %1 %+ _u64
1172	%endif ; RT_ARCH_AMD64
1173
1174	%endmacro
1175
1176	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1177	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1178
1179
1180	;;
1181	; Macro for implementing a multiplication operations.
1182	;
1183	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1184	; 32-bit system where the 64-bit accesses requires hand coding.
1185	;
1186	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1187	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1188	; pointer to eflags in A3.
1189	;
1190	; The functions all return 0 so the caller can be used for div/idiv as well as
1191	; for the mul/imul implementation.
1192	;
1193	; @param 1 The instruction mnemonic.
1194	; @param 2 The modified flags.
1195	; @param 3 The undefined flags.
1196	;
1197	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1198	;
1199	%macro IEMIMPL_MUL_OP 3
1200	BEGINCODE
1201	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1202	PROLOGUE_3_ARGS
1203	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1204	mov al, [A0]
1205	%1 A1_8
1206	mov [A0], ax
1207	IEM_SAVE_FLAGS A2, %2, %3
1208	xor eax, eax
1209	EPILOGUE_3_ARGS
1210	ENDPROC iemAImpl_ %+ %1 %+ _u8
1211
1212	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1213	PROLOGUE_4_ARGS
1214	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1215	mov ax, [A0]
1216	%ifdef ASM_CALL64_GCC
1217	%1 A2_16
1218	mov [A0], ax
1219	mov [A1], dx
1220	%else
1221	mov T1, A1
1222	%1 A2_16
1223	mov [A0], ax
1224	mov [T1], dx
1225	%endif
1226	IEM_SAVE_FLAGS A3, %2, %3
1227	xor eax, eax
1228	EPILOGUE_4_ARGS
1229	ENDPROC iemAImpl_ %+ %1 %+ _u16
1230
1231	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1232	PROLOGUE_4_ARGS
1233	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1234	mov eax, [A0]
1235	%ifdef ASM_CALL64_GCC
1236	%1 A2_32
1237	mov [A0], eax
1238	mov [A1], edx
1239	%else
1240	mov T1, A1
1241	%1 A2_32
1242	mov [A0], eax
1243	mov [T1], edx
1244	%endif
1245	IEM_SAVE_FLAGS A3, %2, %3
1246	xor eax, eax
1247	EPILOGUE_4_ARGS
1248	ENDPROC iemAImpl_ %+ %1 %+ _u32
1249
1250	%ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1251	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1252	PROLOGUE_4_ARGS
1253	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1254	mov rax, [A0]
1255	%ifdef ASM_CALL64_GCC
1256	%1 A2
1257	mov [A0], rax
1258	mov [A1], rdx
1259	%else
1260	mov T1, A1
1261	%1 A2
1262	mov [A0], rax
1263	mov [T1], rdx
1264	%endif
1265	IEM_SAVE_FLAGS A3, %2, %3
1266	xor eax, eax
1267	EPILOGUE_4_ARGS_EX 12
1268	ENDPROC iemAImpl_ %+ %1 %+ _u64
1269	%endif ; !RT_ARCH_AMD64
1270
1271	%endmacro
1272
1273	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1274	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1275
1276
1277	BEGINCODE
1278	;;
1279	; Worker function for negating a 32-bit number in T1:T0
1280	; @uses None (T0,T1)
1281	iemAImpl_negate_T0_T1_u32:
1282	push 0
1283	push 0
1284	xchg T0_32, [xSP]
1285	xchg T1_32, [xSP + xCB]
1286	sub T0_32, [xSP]
1287	sbb T1_32, [xSP + xCB]
1288	add xSP, xCB*2
1289	ret
1290
1291	%ifdef RT_ARCH_AMD64
1292	;;
1293	; Worker function for negating a 64-bit number in T1:T0
1294	; @uses None (T0,T1)
1295	iemAImpl_negate_T0_T1_u64:
1296	push 0
1297	push 0
1298	xchg T0, [xSP]
1299	xchg T1, [xSP + xCB]
1300	sub T0, [xSP]
1301	sbb T1, [xSP + xCB]
1302	add xSP, xCB*2
1303	ret
1304	%endif
1305
1306
1307	;;
1308	; Macro for implementing a division operations.
1309	;
1310	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1311	; 32-bit system where the 64-bit accesses requires hand coding.
1312	;
1313	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1314	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1315	; pointer to eflags in A3.
1316	;
1317	; The functions all return 0 on success and -1 if a divide error should be
1318	; raised by the caller.
1319	;
1320	; @param 1 The instruction mnemonic.
1321	; @param 2 The modified flags.
1322	; @param 3 The undefined flags.
1323	; @param 4 1 if signed, 0 if unsigned.
1324	;
1325	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1326	;
1327	%macro IEMIMPL_DIV_OP 4
1328	BEGINCODE
1329	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1330	PROLOGUE_3_ARGS
1331
1332	; div by chainsaw check.
1333	test A1_8, A1_8
1334	jz .div_zero
1335
1336	; Overflow check - unsigned division is simple to verify, haven't
1337	; found a simple way to check signed division yet unfortunately.
1338	%if %4 == 0
1339	cmp [A0 + 1], A1_8
1340	jae .div_overflow
1341	%else
1342	mov T0_16, [A0] ; T0 = dividend
1343	mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1344	test A1_8, A1_8
1345	js .divisor_negative
1346	test T0_16, T0_16
1347	jns .both_positive
1348	neg T0_16
1349	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1350	push T0 ; Start off like unsigned below.
1351	shr T0_16, 7
1352	cmp T0_8, A1_8
1353	pop T0
1354	jb .div_no_overflow
1355	ja .div_overflow
1356	and T0_8, 0x7f ; Special case for covering (divisor - 1).
1357	cmp T0_8, A1_8
1358	jae .div_overflow
1359	jmp .div_no_overflow
1360
1361	.divisor_negative:
1362	neg A1_8
1363	test T0_16, T0_16
1364	jns .one_of_each
1365	neg T0_16
1366	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1367	shr T0_16, 7
1368	cmp T0_8, A1_8
1369	jae .div_overflow
1370	.div_no_overflow:
1371	mov A1, T1 ; restore divisor
1372	%endif
1373
1374	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1375	mov ax, [A0]
1376	%1 A1_8
1377	mov [A0], ax
1378	IEM_SAVE_FLAGS A2, %2, %3
1379	xor eax, eax
1380
1381	.return:
1382	EPILOGUE_3_ARGS
1383
1384	.div_zero:
1385	.div_overflow:
1386	mov eax, -1
1387	jmp .return
1388	ENDPROC iemAImpl_ %+ %1 %+ _u8
1389
1390	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1391	PROLOGUE_4_ARGS
1392
1393	; div by chainsaw check.
1394	test A2_16, A2_16
1395	jz .div_zero
1396
1397	; Overflow check - unsigned division is simple to verify, haven't
1398	; found a simple way to check signed division yet unfortunately.
1399	%if %4 == 0
1400	cmp [A1], A2_16
1401	jae .div_overflow
1402	%else
1403	mov T0_16, [A1]
1404	shl T0_32, 16
1405	mov T0_16, [A0] ; T0 = dividend
1406	mov T1, A2 ; T1 = divisor
1407	test T1_16, T1_16
1408	js .divisor_negative
1409	test T0_32, T0_32
1410	jns .both_positive
1411	neg T0_32
1412	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1413	push T0 ; Start off like unsigned below.
1414	shr T0_32, 15
1415	cmp T0_16, T1_16
1416	pop T0
1417	jb .div_no_overflow
1418	ja .div_overflow
1419	and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1420	cmp T0_16, T1_16
1421	jae .div_overflow
1422	jmp .div_no_overflow
1423
1424	.divisor_negative:
1425	neg T1_16
1426	test T0_32, T0_32
1427	jns .one_of_each
1428	neg T0_32
1429	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1430	shr T0_32, 15
1431	cmp T0_16, T1_16
1432	jae .div_overflow
1433	.div_no_overflow:
1434	%endif
1435
1436	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1437	%ifdef ASM_CALL64_GCC
1438	mov T1, A2
1439	mov ax, [A0]
1440	mov dx, [A1]
1441	%1 T1_16
1442	mov [A0], ax
1443	mov [A1], dx
1444	%else
1445	mov T1, A1
1446	mov ax, [A0]
1447	mov dx, [T1]
1448	%1 A2_16
1449	mov [A0], ax
1450	mov [T1], dx
1451	%endif
1452	IEM_SAVE_FLAGS A3, %2, %3
1453	xor eax, eax
1454
1455	.return:
1456	EPILOGUE_4_ARGS
1457
1458	.div_zero:
1459	.div_overflow:
1460	mov eax, -1
1461	jmp .return
1462	ENDPROC iemAImpl_ %+ %1 %+ _u16
1463
1464	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1465	PROLOGUE_4_ARGS
1466
1467	; div by chainsaw check.
1468	test A2_32, A2_32
1469	jz .div_zero
1470
1471	; Overflow check - unsigned division is simple to verify, haven't
1472	; found a simple way to check signed division yet unfortunately.
1473	%if %4 == 0
1474	cmp [A1], A2_32
1475	jae .div_overflow
1476	%else
1477	push A2 ; save A2 so we modify it (we out of regs on x86).
1478	mov T0_32, [A0] ; T0 = dividend low
1479	mov T1_32, [A1] ; T1 = dividend high
1480	test A2_32, A2_32
1481	js .divisor_negative
1482	test T1_32, T1_32
1483	jns .both_positive
1484	call iemAImpl_negate_T0_T1_u32
1485	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1486	push T0 ; Start off like unsigned below.
1487	shl T1_32, 1
1488	shr T0_32, 31
1489	or T1_32, T0_32
1490	cmp T1_32, A2_32
1491	pop T0
1492	jb .div_no_overflow
1493	ja .div_overflow
1494	and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1495	cmp T0_32, A2_32
1496	jae .div_overflow
1497	jmp .div_no_overflow
1498
1499	.divisor_negative:
1500	neg A2_32
1501	test T1_32, T1_32
1502	jns .one_of_each
1503	call iemAImpl_negate_T0_T1_u32
1504	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1505	shl T1_32, 1
1506	shr T0_32, 31
1507	or T1_32, T0_32
1508	cmp T1_32, A2_32
1509	jae .div_overflow
1510	.div_no_overflow:
1511	pop A2
1512	%endif
1513
1514	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1515	mov eax, [A0]
1516	%ifdef ASM_CALL64_GCC
1517	mov T1, A2
1518	mov eax, [A0]
1519	mov edx, [A1]
1520	%1 T1_32
1521	mov [A0], eax
1522	mov [A1], edx
1523	%else
1524	mov T1, A1
1525	mov eax, [A0]
1526	mov edx, [T1]
1527	%1 A2_32
1528	mov [A0], eax
1529	mov [T1], edx
1530	%endif
1531	IEM_SAVE_FLAGS A3, %2, %3
1532	xor eax, eax
1533
1534	.return:
1535	EPILOGUE_4_ARGS
1536
1537	.div_overflow:
1538	%if %4 != 0
1539	pop A2
1540	%endif
1541	.div_zero:
1542	mov eax, -1
1543	jmp .return
1544	ENDPROC iemAImpl_ %+ %1 %+ _u32
1545
1546	%ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1547	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1548	PROLOGUE_4_ARGS
1549
1550	test A2, A2
1551	jz .div_zero
1552	%if %4 == 0
1553	cmp [A1], A2
1554	jae .div_overflow
1555	%else
1556	push A2 ; save A2 so we modify it (we out of regs on x86).
1557	mov T0, [A0] ; T0 = dividend low
1558	mov T1, [A1] ; T1 = dividend high
1559	test A2, A2
1560	js .divisor_negative
1561	test T1, T1
1562	jns .both_positive
1563	call iemAImpl_negate_T0_T1_u64
1564	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1565	push T0 ; Start off like unsigned below.
1566	shl T1, 1
1567	shr T0, 63
1568	or T1, T0
1569	cmp T1, A2
1570	pop T0
1571	jb .div_no_overflow
1572	ja .div_overflow
1573	mov T1, 0x7fffffffffffffff
1574	and T0, T1 ; Special case for covering (divisor - 1).
1575	cmp T0, A2
1576	jae .div_overflow
1577	jmp .div_no_overflow
1578
1579	.divisor_negative:
1580	neg A2
1581	test T1, T1
1582	jns .one_of_each
1583	call iemAImpl_negate_T0_T1_u64
1584	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1585	shl T1, 1
1586	shr T0, 63
1587	or T1, T0
1588	cmp T1, A2
1589	jae .div_overflow
1590	.div_no_overflow:
1591	pop A2
1592	%endif
1593
1594	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1595	mov rax, [A0]
1596	%ifdef ASM_CALL64_GCC
1597	mov T1, A2
1598	mov rax, [A0]
1599	mov rdx, [A1]
1600	%1 T1
1601	mov [A0], rax
1602	mov [A1], rdx
1603	%else
1604	mov T1, A1
1605	mov rax, [A0]
1606	mov rdx, [T1]
1607	%1 A2
1608	mov [A0], rax
1609	mov [T1], rdx
1610	%endif
1611	IEM_SAVE_FLAGS A3, %2, %3
1612	xor eax, eax
1613
1614	.return:
1615	EPILOGUE_4_ARGS_EX 12
1616
1617	.div_overflow:
1618	%if %4 != 0
1619	pop A2
1620	%endif
1621	.div_zero:
1622	mov eax, -1
1623	jmp .return
1624	ENDPROC iemAImpl_ %+ %1 %+ _u64
1625	%endif ; !RT_ARCH_AMD64
1626
1627	%endmacro
1628
1629	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1630	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 1
1631
1632
1633	;
1634	; BSWAP. No flag changes.
1635	;
1636	; Each function takes one argument, pointer to the value to bswap
1637	; (input/output). They all return void.
1638	;
1639	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1640	PROLOGUE_1_ARGS
1641	mov T0_32, [A0] ; just in case any of the upper bits are used.
1642	db 66h
1643	bswap T0_32
1644	mov [A0], T0_32
1645	EPILOGUE_1_ARGS
1646	ENDPROC iemAImpl_bswap_u16
1647
1648	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1649	PROLOGUE_1_ARGS
1650	mov T0_32, [A0]
1651	bswap T0_32
1652	mov [A0], T0_32
1653	EPILOGUE_1_ARGS
1654	ENDPROC iemAImpl_bswap_u32
1655
1656	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1657	%ifdef RT_ARCH_AMD64
1658	PROLOGUE_1_ARGS
1659	mov T0, [A0]
1660	bswap T0
1661	mov [A0], T0
1662	EPILOGUE_1_ARGS
1663	%else
1664	PROLOGUE_1_ARGS
1665	mov T0, [A0]
1666	mov T1, [A0 + 4]
1667	bswap T0
1668	bswap T1
1669	mov [A0 + 4], T0
1670	mov [A0], T1
1671	EPILOGUE_1_ARGS
1672	%endif
1673	ENDPROC iemAImpl_bswap_u64
1674
1675
1676	;;
1677	; Initialize the FPU for the actual instruction being emulated, this means
1678	; loading parts of the guest's control word and status word.
1679	;
1680	; @uses 24 bytes of stack.
1681	; @param 1 Expression giving the address of the FXSTATE of the guest.
1682	;
1683	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1684	fnstenv [xSP]
1685
1686	; FCW - for exception, precision and rounding control.
1687	movzx T0, word [%1 + X86FXSTATE.FCW]
1688	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
1689	mov [xSP + X86FSTENV32P.FCW], T0_16
1690
1691	; FSW - for undefined C0, C1, C2, and C3.
1692	movzx T1, word [%1 + X86FXSTATE.FSW]
1693	and T1, X86_FSW_C_MASK
1694	movzx T0, word [xSP + X86FSTENV32P.FSW]
1695	and T0, X86_FSW_TOP_MASK
1696	or T0, T1
1697	mov [xSP + X86FSTENV32P.FSW], T0_16
1698
1699	fldenv [xSP]
1700	%endmacro
1701
1702
1703	;;
1704	; Need to move this as well somewhere better?
1705	;
1706	struc IEMFPURESULT
1707	.r80Result resw 5
1708	.FSW resw 1
1709	endstruc
1710
1711
1712	;;
1713	; Need to move this as well somewhere better?
1714	;
1715	struc IEMFPURESULTTWO
1716	.r80Result1 resw 5
1717	.FSW resw 1
1718	.r80Result2 resw 5
1719	endstruc
1720
1721
1722	;
1723	;---------------------- 16-bit signed integer operations ----------------------
1724	;
1725
1726
1727	;;
1728	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1729	;
1730	; @param A0 FPU context (fxsave).
1731	; @param A1 Pointer to a IEMFPURESULT for the output.
1732	; @param A2 Pointer to the 16-bit floating point value to convert.
1733	;
1734	BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1735	PROLOGUE_3_ARGS
1736	sub xSP, 20h
1737
1738	fninit
1739	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1740	fild word [A2]
1741
1742	fnstsw word [A1 + IEMFPURESULT.FSW]
1743	fnclex
1744	fstp tword [A1 + IEMFPURESULT.r80Result]
1745
1746	fninit
1747	add xSP, 20h
1748	EPILOGUE_3_ARGS
1749	ENDPROC iemAImpl_fild_i16_to_r80
1750
1751
1752	;;
1753	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1754	;
1755	; @param A0 FPU context (fxsave).
1756	; @param A1 Where to return the output FSW.
1757	; @param A2 Where to store the 16-bit signed integer value.
1758	; @param A3 Pointer to the 80-bit value.
1759	;
1760	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1761	PROLOGUE_4_ARGS
1762	sub xSP, 20h
1763
1764	fninit
1765	fld tword [A3]
1766	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1767	fistp word [A2]
1768
1769	fnstsw word [A1]
1770
1771	fninit
1772	add xSP, 20h
1773	EPILOGUE_4_ARGS
1774	ENDPROC iemAImpl_fist_r80_to_i16
1775
1776
1777	;;
1778	; Store a 80-bit floating point value (register) as a 16-bit signed integer
1779	; (memory) with truncation.
1780	;
1781	; @param A0 FPU context (fxsave).
1782	; @param A1 Where to return the output FSW.
1783	; @param A2 Where to store the 16-bit signed integer value.
1784	; @param A3 Pointer to the 80-bit value.
1785	;
1786	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1787	PROLOGUE_4_ARGS
1788	sub xSP, 20h
1789
1790	fninit
1791	fld tword [A3]
1792	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1793	fisttp dword [A2]
1794
1795	fnstsw word [A1]
1796
1797	fninit
1798	add xSP, 20h
1799	EPILOGUE_4_ARGS
1800	ENDPROC iemAImpl_fistt_r80_to_i16
1801
1802
1803	;;
1804	; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1805	;
1806	; @param 1 The instruction
1807	;
1808	; @param A0 FPU context (fxsave).
1809	; @param A1 Pointer to a IEMFPURESULT for the output.
1810	; @param A2 Pointer to the 80-bit value.
1811	; @param A3 Pointer to the 16-bit value.
1812	;
1813	%macro IEMIMPL_FPU_R80_BY_I16 1
1814	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1815	PROLOGUE_4_ARGS
1816	sub xSP, 20h
1817
1818	fninit
1819	fld tword [A2]
1820	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1821	%1 word [A3]
1822
1823	fnstsw word [A1 + IEMFPURESULT.FSW]
1824	fnclex
1825	fstp tword [A1 + IEMFPURESULT.r80Result]
1826
1827	fninit
1828	add xSP, 20h
1829	EPILOGUE_4_ARGS
1830	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1831	%endmacro
1832
1833	IEMIMPL_FPU_R80_BY_I16 fiadd
1834	IEMIMPL_FPU_R80_BY_I16 fimul
1835	IEMIMPL_FPU_R80_BY_I16 fisub
1836	IEMIMPL_FPU_R80_BY_I16 fisubr
1837	IEMIMPL_FPU_R80_BY_I16 fidiv
1838	IEMIMPL_FPU_R80_BY_I16 fidivr
1839
1840
1841	;;
1842	; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1843	; only returning FSW.
1844	;
1845	; @param 1 The instruction
1846	;
1847	; @param A0 FPU context (fxsave).
1848	; @param A1 Where to store the output FSW.
1849	; @param A2 Pointer to the 80-bit value.
1850	; @param A3 Pointer to the 64-bit value.
1851	;
1852	%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1853	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1854	PROLOGUE_4_ARGS
1855	sub xSP, 20h
1856
1857	fninit
1858	fld tword [A2]
1859	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1860	%1 word [A3]
1861
1862	fnstsw word [A1]
1863
1864	fninit
1865	add xSP, 20h
1866	EPILOGUE_4_ARGS
1867	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1868	%endmacro
1869
1870	IEMIMPL_FPU_R80_BY_I16_FSW ficom
1871
1872
1873
1874	;
1875	;---------------------- 32-bit signed integer operations ----------------------
1876	;
1877
1878
1879	;;
1880	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1881	;
1882	; @param A0 FPU context (fxsave).
1883	; @param A1 Pointer to a IEMFPURESULT for the output.
1884	; @param A2 Pointer to the 32-bit floating point value to convert.
1885	;
1886	BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1887	PROLOGUE_3_ARGS
1888	sub xSP, 20h
1889
1890	fninit
1891	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1892	fild dword [A2]
1893
1894	fnstsw word [A1 + IEMFPURESULT.FSW]
1895	fnclex
1896	fstp tword [A1 + IEMFPURESULT.r80Result]
1897
1898	fninit
1899	add xSP, 20h
1900	EPILOGUE_3_ARGS
1901	ENDPROC iemAImpl_fild_i32_to_r80
1902
1903
1904	;;
1905	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1906	;
1907	; @param A0 FPU context (fxsave).
1908	; @param A1 Where to return the output FSW.
1909	; @param A2 Where to store the 32-bit signed integer value.
1910	; @param A3 Pointer to the 80-bit value.
1911	;
1912	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1913	PROLOGUE_4_ARGS
1914	sub xSP, 20h
1915
1916	fninit
1917	fld tword [A3]
1918	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1919	fistp dword [A2]
1920
1921	fnstsw word [A1]
1922
1923	fninit
1924	add xSP, 20h
1925	EPILOGUE_4_ARGS
1926	ENDPROC iemAImpl_fist_r80_to_i32
1927
1928
1929	;;
1930	; Store a 80-bit floating point value (register) as a 32-bit signed integer
1931	; (memory) with truncation.
1932	;
1933	; @param A0 FPU context (fxsave).
1934	; @param A1 Where to return the output FSW.
1935	; @param A2 Where to store the 32-bit signed integer value.
1936	; @param A3 Pointer to the 80-bit value.
1937	;
1938	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1939	PROLOGUE_4_ARGS
1940	sub xSP, 20h
1941
1942	fninit
1943	fld tword [A3]
1944	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1945	fisttp dword [A2]
1946
1947	fnstsw word [A1]
1948
1949	fninit
1950	add xSP, 20h
1951	EPILOGUE_4_ARGS
1952	ENDPROC iemAImpl_fistt_r80_to_i32
1953
1954
1955	;;
1956	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1957	;
1958	; @param 1 The instruction
1959	;
1960	; @param A0 FPU context (fxsave).
1961	; @param A1 Pointer to a IEMFPURESULT for the output.
1962	; @param A2 Pointer to the 80-bit value.
1963	; @param A3 Pointer to the 32-bit value.
1964	;
1965	%macro IEMIMPL_FPU_R80_BY_I32 1
1966	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1967	PROLOGUE_4_ARGS
1968	sub xSP, 20h
1969
1970	fninit
1971	fld tword [A2]
1972	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1973	%1 dword [A3]
1974
1975	fnstsw word [A1 + IEMFPURESULT.FSW]
1976	fnclex
1977	fstp tword [A1 + IEMFPURESULT.r80Result]
1978
1979	fninit
1980	add xSP, 20h
1981	EPILOGUE_4_ARGS
1982	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1983	%endmacro
1984
1985	IEMIMPL_FPU_R80_BY_I32 fiadd
1986	IEMIMPL_FPU_R80_BY_I32 fimul
1987	IEMIMPL_FPU_R80_BY_I32 fisub
1988	IEMIMPL_FPU_R80_BY_I32 fisubr
1989	IEMIMPL_FPU_R80_BY_I32 fidiv
1990	IEMIMPL_FPU_R80_BY_I32 fidivr
1991
1992
1993	;;
1994	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1995	; only returning FSW.
1996	;
1997	; @param 1 The instruction
1998	;
1999	; @param A0 FPU context (fxsave).
2000	; @param A1 Where to store the output FSW.
2001	; @param A2 Pointer to the 80-bit value.
2002	; @param A3 Pointer to the 64-bit value.
2003	;
2004	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2005	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2006	PROLOGUE_4_ARGS
2007	sub xSP, 20h
2008
2009	fninit
2010	fld tword [A2]
2011	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2012	%1 dword [A3]
2013
2014	fnstsw word [A1]
2015
2016	fninit
2017	add xSP, 20h
2018	EPILOGUE_4_ARGS
2019	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2020	%endmacro
2021
2022	IEMIMPL_FPU_R80_BY_I32_FSW ficom
2023
2024
2025
2026	;
2027	;---------------------- 64-bit signed integer operations ----------------------
2028	;
2029
2030
2031	;;
2032	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2033	;
2034	; @param A0 FPU context (fxsave).
2035	; @param A1 Pointer to a IEMFPURESULT for the output.
2036	; @param A2 Pointer to the 64-bit floating point value to convert.
2037	;
2038	BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2039	PROLOGUE_3_ARGS
2040	sub xSP, 20h
2041
2042	fninit
2043	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2044	fild qword [A2]
2045
2046	fnstsw word [A1 + IEMFPURESULT.FSW]
2047	fnclex
2048	fstp tword [A1 + IEMFPURESULT.r80Result]
2049
2050	fninit
2051	add xSP, 20h
2052	EPILOGUE_3_ARGS
2053	ENDPROC iemAImpl_fild_i64_to_r80
2054
2055
2056	;;
2057	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2058	;
2059	; @param A0 FPU context (fxsave).
2060	; @param A1 Where to return the output FSW.
2061	; @param A2 Where to store the 64-bit signed integer value.
2062	; @param A3 Pointer to the 80-bit value.
2063	;
2064	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2065	PROLOGUE_4_ARGS
2066	sub xSP, 20h
2067
2068	fninit
2069	fld tword [A3]
2070	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2071	fistp qword [A2]
2072
2073	fnstsw word [A1]
2074
2075	fninit
2076	add xSP, 20h
2077	EPILOGUE_4_ARGS
2078	ENDPROC iemAImpl_fist_r80_to_i64
2079
2080
2081	;;
2082	; Store a 80-bit floating point value (register) as a 64-bit signed integer
2083	; (memory) with truncation.
2084	;
2085	; @param A0 FPU context (fxsave).
2086	; @param A1 Where to return the output FSW.
2087	; @param A2 Where to store the 64-bit signed integer value.
2088	; @param A3 Pointer to the 80-bit value.
2089	;
2090	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2091	PROLOGUE_4_ARGS
2092	sub xSP, 20h
2093
2094	fninit
2095	fld tword [A3]
2096	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2097	fisttp qword [A2]
2098
2099	fnstsw word [A1]
2100
2101	fninit
2102	add xSP, 20h
2103	EPILOGUE_4_ARGS
2104	ENDPROC iemAImpl_fistt_r80_to_i64
2105
2106
2107
2108	;
2109	;---------------------- 32-bit floating point operations ----------------------
2110	;
2111
2112	;;
2113	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2114	;
2115	; @param A0 FPU context (fxsave).
2116	; @param A1 Pointer to a IEMFPURESULT for the output.
2117	; @param A2 Pointer to the 32-bit floating point value to convert.
2118	;
2119	BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2120	PROLOGUE_3_ARGS
2121	sub xSP, 20h
2122
2123	fninit
2124	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2125	fld dword [A2]
2126
2127	fnstsw word [A1 + IEMFPURESULT.FSW]
2128	fnclex
2129	fstp tword [A1 + IEMFPURESULT.r80Result]
2130
2131	fninit
2132	add xSP, 20h
2133	EPILOGUE_3_ARGS
2134	ENDPROC iemAImpl_fld_r32_to_r80
2135
2136
2137	;;
2138	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2139	;
2140	; @param A0 FPU context (fxsave).
2141	; @param A1 Where to return the output FSW.
2142	; @param A2 Where to store the 32-bit value.
2143	; @param A3 Pointer to the 80-bit value.
2144	;
2145	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2146	PROLOGUE_4_ARGS
2147	sub xSP, 20h
2148
2149	fninit
2150	fld tword [A3]
2151	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2152	fst dword [A2]
2153
2154	fnstsw word [A1]
2155
2156	fninit
2157	add xSP, 20h
2158	EPILOGUE_4_ARGS
2159	ENDPROC iemAImpl_fst_r80_to_r32
2160
2161
2162	;;
2163	; FPU instruction working on one 80-bit and one 32-bit floating point value.
2164	;
2165	; @param 1 The instruction
2166	;
2167	; @param A0 FPU context (fxsave).
2168	; @param A1 Pointer to a IEMFPURESULT for the output.
2169	; @param A2 Pointer to the 80-bit value.
2170	; @param A3 Pointer to the 32-bit value.
2171	;
2172	%macro IEMIMPL_FPU_R80_BY_R32 1
2173	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2174	PROLOGUE_4_ARGS
2175	sub xSP, 20h
2176
2177	fninit
2178	fld tword [A2]
2179	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2180	%1 dword [A3]
2181
2182	fnstsw word [A1 + IEMFPURESULT.FSW]
2183	fnclex
2184	fstp tword [A1 + IEMFPURESULT.r80Result]
2185
2186	fninit
2187	add xSP, 20h
2188	EPILOGUE_4_ARGS
2189	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2190	%endmacro
2191
2192	IEMIMPL_FPU_R80_BY_R32 fadd
2193	IEMIMPL_FPU_R80_BY_R32 fmul
2194	IEMIMPL_FPU_R80_BY_R32 fsub
2195	IEMIMPL_FPU_R80_BY_R32 fsubr
2196	IEMIMPL_FPU_R80_BY_R32 fdiv
2197	IEMIMPL_FPU_R80_BY_R32 fdivr
2198
2199
2200	;;
2201	; FPU instruction working on one 80-bit and one 32-bit floating point value,
2202	; only returning FSW.
2203	;
2204	; @param 1 The instruction
2205	;
2206	; @param A0 FPU context (fxsave).
2207	; @param A1 Where to store the output FSW.
2208	; @param A2 Pointer to the 80-bit value.
2209	; @param A3 Pointer to the 64-bit value.
2210	;
2211	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2212	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2213	PROLOGUE_4_ARGS
2214	sub xSP, 20h
2215
2216	fninit
2217	fld tword [A2]
2218	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2219	%1 dword [A3]
2220
2221	fnstsw word [A1]
2222
2223	fninit
2224	add xSP, 20h
2225	EPILOGUE_4_ARGS
2226	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2227	%endmacro
2228
2229	IEMIMPL_FPU_R80_BY_R32_FSW fcom
2230
2231
2232
2233	;
2234	;---------------------- 64-bit floating point operations ----------------------
2235	;
2236
2237	;;
2238	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2239	;
2240	; @param A0 FPU context (fxsave).
2241	; @param A1 Pointer to a IEMFPURESULT for the output.
2242	; @param A2 Pointer to the 64-bit floating point value to convert.
2243	;
2244	BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2245	PROLOGUE_3_ARGS
2246	sub xSP, 20h
2247
2248	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2249	fld qword [A2]
2250
2251	fnstsw word [A1 + IEMFPURESULT.FSW]
2252	fnclex
2253	fstp tword [A1 + IEMFPURESULT.r80Result]
2254
2255	fninit
2256	add xSP, 20h
2257	EPILOGUE_3_ARGS
2258	ENDPROC iemAImpl_fld_r64_to_r80
2259
2260
2261	;;
2262	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2263	;
2264	; @param A0 FPU context (fxsave).
2265	; @param A1 Where to return the output FSW.
2266	; @param A2 Where to store the 64-bit value.
2267	; @param A3 Pointer to the 80-bit value.
2268	;
2269	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2270	PROLOGUE_4_ARGS
2271	sub xSP, 20h
2272
2273	fninit
2274	fld tword [A3]
2275	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2276	fst qword [A2]
2277
2278	fnstsw word [A1]
2279
2280	fninit
2281	add xSP, 20h
2282	EPILOGUE_4_ARGS
2283	ENDPROC iemAImpl_fst_r80_to_r64
2284
2285
2286	;;
2287	; FPU instruction working on one 80-bit and one 64-bit floating point value.
2288	;
2289	; @param 1 The instruction
2290	;
2291	; @param A0 FPU context (fxsave).
2292	; @param A1 Pointer to a IEMFPURESULT for the output.
2293	; @param A2 Pointer to the 80-bit value.
2294	; @param A3 Pointer to the 64-bit value.
2295	;
2296	%macro IEMIMPL_FPU_R80_BY_R64 1
2297	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2298	PROLOGUE_4_ARGS
2299	sub xSP, 20h
2300
2301	fninit
2302	fld tword [A2]
2303	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2304	%1 qword [A3]
2305
2306	fnstsw word [A1 + IEMFPURESULT.FSW]
2307	fnclex
2308	fstp tword [A1 + IEMFPURESULT.r80Result]
2309
2310	fninit
2311	add xSP, 20h
2312	EPILOGUE_4_ARGS
2313	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2314	%endmacro
2315
2316	IEMIMPL_FPU_R80_BY_R64 fadd
2317	IEMIMPL_FPU_R80_BY_R64 fmul
2318	IEMIMPL_FPU_R80_BY_R64 fsub
2319	IEMIMPL_FPU_R80_BY_R64 fsubr
2320	IEMIMPL_FPU_R80_BY_R64 fdiv
2321	IEMIMPL_FPU_R80_BY_R64 fdivr
2322
2323	;;
2324	; FPU instruction working on one 80-bit and one 64-bit floating point value,
2325	; only returning FSW.
2326	;
2327	; @param 1 The instruction
2328	;
2329	; @param A0 FPU context (fxsave).
2330	; @param A1 Where to store the output FSW.
2331	; @param A2 Pointer to the 80-bit value.
2332	; @param A3 Pointer to the 64-bit value.
2333	;
2334	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2335	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2336	PROLOGUE_4_ARGS
2337	sub xSP, 20h
2338
2339	fninit
2340	fld tword [A2]
2341	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2342	%1 qword [A3]
2343
2344	fnstsw word [A1]
2345
2346	fninit
2347	add xSP, 20h
2348	EPILOGUE_4_ARGS
2349	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2350	%endmacro
2351
2352	IEMIMPL_FPU_R80_BY_R64_FSW fcom
2353
2354
2355
2356	;
2357	;---------------------- 80-bit floating point operations ----------------------
2358	;
2359
2360	;;
2361	; Loads a 80-bit floating point register value from memory.
2362	;
2363	; @param A0 FPU context (fxsave).
2364	; @param A1 Pointer to a IEMFPURESULT for the output.
2365	; @param A2 Pointer to the 80-bit floating point value to load.
2366	;
2367	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2368	PROLOGUE_3_ARGS
2369	sub xSP, 20h
2370
2371	fninit
2372	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2373	fld tword [A2]
2374
2375	fnstsw word [A1 + IEMFPURESULT.FSW]
2376	fnclex
2377	fstp tword [A1 + IEMFPURESULT.r80Result]
2378
2379	fninit
2380	add xSP, 20h
2381	EPILOGUE_3_ARGS
2382	ENDPROC iemAImpl_fld_r80_from_r80
2383
2384
2385	;;
2386	; Store a 80-bit floating point register to memory
2387	;
2388	; @param A0 FPU context (fxsave).
2389	; @param A1 Where to return the output FSW.
2390	; @param A2 Where to store the 80-bit value.
2391	; @param A3 Pointer to the 80-bit register value.
2392	;
2393	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2394	PROLOGUE_4_ARGS
2395	sub xSP, 20h
2396
2397	fninit
2398	fld tword [A3]
2399	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2400	fstp tword [A2]
2401
2402	fnstsw word [A1]
2403
2404	fninit
2405	add xSP, 20h
2406	EPILOGUE_4_ARGS
2407	ENDPROC iemAImpl_fst_r80_to_r80
2408
2409
2410	;;
2411	; FPU instruction working on two 80-bit floating point values.
2412	;
2413	; @param 1 The instruction
2414	;
2415	; @param A0 FPU context (fxsave).
2416	; @param A1 Pointer to a IEMFPURESULT for the output.
2417	; @param A2 Pointer to the first 80-bit value (ST0)
2418	; @param A3 Pointer to the second 80-bit value (STn).
2419	;
2420	%macro IEMIMPL_FPU_R80_BY_R80 2
2421	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2422	PROLOGUE_4_ARGS
2423	sub xSP, 20h
2424
2425	fninit
2426	fld tword [A3]
2427	fld tword [A2]
2428	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2429	%1 %2
2430
2431	fnstsw word [A1 + IEMFPURESULT.FSW]
2432	fnclex
2433	fstp tword [A1 + IEMFPURESULT.r80Result]
2434
2435	fninit
2436	add xSP, 20h
2437	EPILOGUE_4_ARGS
2438	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2439	%endmacro
2440
2441	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2442	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2443	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2444	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2445	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2446	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2447	IEMIMPL_FPU_R80_BY_R80 fprem, {}
2448	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2449	IEMIMPL_FPU_R80_BY_R80 fscale, {}
2450
2451
2452	;;
2453	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2454	; storing the result in ST1 and popping the stack.
2455	;
2456	; @param 1 The instruction
2457	;
2458	; @param A0 FPU context (fxsave).
2459	; @param A1 Pointer to a IEMFPURESULT for the output.
2460	; @param A2 Pointer to the first 80-bit value (ST1).
2461	; @param A3 Pointer to the second 80-bit value (ST0).
2462	;
2463	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2464	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2465	PROLOGUE_4_ARGS
2466	sub xSP, 20h
2467
2468	fninit
2469	fld tword [A2]
2470	fld tword [A3]
2471	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2472	%1
2473
2474	fnstsw word [A1 + IEMFPURESULT.FSW]
2475	fnclex
2476	fstp tword [A1 + IEMFPURESULT.r80Result]
2477
2478	fninit
2479	add xSP, 20h
2480	EPILOGUE_4_ARGS
2481	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2482	%endmacro
2483
2484	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2485	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2486
2487
2488	;;
2489	; FPU instruction working on two 80-bit floating point values, only
2490	; returning FSW.
2491	;
2492	; @param 1 The instruction
2493	;
2494	; @param A0 FPU context (fxsave).
2495	; @param A1 Pointer to a uint16_t for the resulting FSW.
2496	; @param A2 Pointer to the first 80-bit value.
2497	; @param A3 Pointer to the second 80-bit value.
2498	;
2499	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2500	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2501	PROLOGUE_4_ARGS
2502	sub xSP, 20h
2503
2504	fninit
2505	fld tword [A3]
2506	fld tword [A2]
2507	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2508	%1 st0, st1
2509
2510	fnstsw word [A1]
2511
2512	fninit
2513	add xSP, 20h
2514	EPILOGUE_4_ARGS
2515	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2516	%endmacro
2517
2518	IEMIMPL_FPU_R80_BY_R80_FSW fcom
2519	IEMIMPL_FPU_R80_BY_R80_FSW fucom
2520
2521
2522	;;
2523	; FPU instruction working on two 80-bit floating point values,
2524	; returning FSW and EFLAGS (eax).
2525	;
2526	; @param 1 The instruction
2527	;
2528	; @returns EFLAGS in EAX.
2529	; @param A0 FPU context (fxsave).
2530	; @param A1 Pointer to a uint16_t for the resulting FSW.
2531	; @param A2 Pointer to the first 80-bit value.
2532	; @param A3 Pointer to the second 80-bit value.
2533	;
2534	%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2535	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2536	PROLOGUE_4_ARGS
2537	sub xSP, 20h
2538
2539	fninit
2540	fld tword [A3]
2541	fld tword [A2]
2542	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2543	%1 st1
2544
2545	fnstsw word [A1]
2546	pushf
2547	pop xAX
2548
2549	fninit
2550	add xSP, 20h
2551	EPILOGUE_4_ARGS
2552	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2553	%endmacro
2554
2555	IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2556	IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2557
2558
2559	;;
2560	; FPU instruction working on one 80-bit floating point value.
2561	;
2562	; @param 1 The instruction
2563	;
2564	; @param A0 FPU context (fxsave).
2565	; @param A1 Pointer to a IEMFPURESULT for the output.
2566	; @param A2 Pointer to the 80-bit value.
2567	;
2568	%macro IEMIMPL_FPU_R80 1
2569	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2570	PROLOGUE_3_ARGS
2571	sub xSP, 20h
2572
2573	fninit
2574	fld tword [A2]
2575	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2576	%1
2577
2578	fnstsw word [A1 + IEMFPURESULT.FSW]
2579	fnclex
2580	fstp tword [A1 + IEMFPURESULT.r80Result]
2581
2582	fninit
2583	add xSP, 20h
2584	EPILOGUE_3_ARGS
2585	ENDPROC iemAImpl_ %+ %1 %+ _r80
2586	%endmacro
2587
2588	IEMIMPL_FPU_R80 fchs
2589	IEMIMPL_FPU_R80 fabs
2590	IEMIMPL_FPU_R80 f2xm1
2591	IEMIMPL_FPU_R80 fyl2x
2592	IEMIMPL_FPU_R80 fsqrt
2593	IEMIMPL_FPU_R80 frndint
2594	IEMIMPL_FPU_R80 fsin
2595	IEMIMPL_FPU_R80 fcos
2596
2597
2598	;;
2599	; FPU instruction working on one 80-bit floating point value, only
2600	; returning FSW.
2601	;
2602	; @param 1 The instruction
2603	;
2604	; @param A0 FPU context (fxsave).
2605	; @param A1 Pointer to a uint16_t for the resulting FSW.
2606	; @param A2 Pointer to the 80-bit value.
2607	;
2608	%macro IEMIMPL_FPU_R80_FSW 1
2609	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2610	PROLOGUE_3_ARGS
2611	sub xSP, 20h
2612
2613	fninit
2614	fld tword [A2]
2615	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2616	%1
2617
2618	fnstsw word [A1]
2619
2620	fninit
2621	add xSP, 20h
2622	EPILOGUE_3_ARGS
2623	ENDPROC iemAImpl_ %+ %1 %+ _r80
2624	%endmacro
2625
2626	IEMIMPL_FPU_R80_FSW ftst
2627	IEMIMPL_FPU_R80_FSW fxam
2628
2629
2630
2631	;;
2632	; FPU instruction loading a 80-bit floating point constant.
2633	;
2634	; @param 1 The instruction
2635	;
2636	; @param A0 FPU context (fxsave).
2637	; @param A1 Pointer to a IEMFPURESULT for the output.
2638	;
2639	%macro IEMIMPL_FPU_R80_CONST 1
2640	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2641	PROLOGUE_2_ARGS
2642	sub xSP, 20h
2643
2644	fninit
2645	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2646	%1
2647
2648	fnstsw word [A1 + IEMFPURESULT.FSW]
2649	fnclex
2650	fstp tword [A1 + IEMFPURESULT.r80Result]
2651
2652	fninit
2653	add xSP, 20h
2654	EPILOGUE_2_ARGS
2655	ENDPROC iemAImpl_ %+ %1 %+
2656	%endmacro
2657
2658	IEMIMPL_FPU_R80_CONST fld1
2659	IEMIMPL_FPU_R80_CONST fldl2t
2660	IEMIMPL_FPU_R80_CONST fldl2e
2661	IEMIMPL_FPU_R80_CONST fldpi
2662	IEMIMPL_FPU_R80_CONST fldlg2
2663	IEMIMPL_FPU_R80_CONST fldln2
2664	IEMIMPL_FPU_R80_CONST fldz
2665
2666
2667	;;
2668	; FPU instruction working on one 80-bit floating point value, outputing two.
2669	;
2670	; @param 1 The instruction
2671	;
2672	; @param A0 FPU context (fxsave).
2673	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2674	; @param A2 Pointer to the 80-bit value.
2675	;
2676	%macro IEMIMPL_FPU_R80_R80 1
2677	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2678	PROLOGUE_3_ARGS
2679	sub xSP, 20h
2680
2681	fninit
2682	fld tword [A2]
2683	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2684	%1
2685
2686	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2687	fnclex
2688	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2689	fnclex
2690	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2691
2692	fninit
2693	add xSP, 20h
2694	EPILOGUE_3_ARGS
2695	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2696	%endmacro
2697
2698	IEMIMPL_FPU_R80_R80 fptan
2699	IEMIMPL_FPU_R80_R80 fxtract
2700	IEMIMPL_FPU_R80_R80 fsincos
2701
2702
2703
2704
2705	;---------------------- SSE and MMX Operations ----------------------
2706
2707	;; @todo what do we need to do for MMX?
2708	%macro IEMIMPL_MMX_PROLOGUE 0
2709	%endmacro
2710	%macro IEMIMPL_MMX_EPILOGUE 0
2711	%endmacro
2712
2713	;; @todo what do we need to do for SSE?
2714	%macro IEMIMPL_SSE_PROLOGUE 0
2715	%endmacro
2716	%macro IEMIMPL_SSE_EPILOGUE 0
2717	%endmacro
2718
2719
2720	;;
2721	; Media instruction working on two full sized registers.
2722	;
2723	; @param 1 The instruction
2724	;
2725	; @param A0 FPU context (fxsave).
2726	; @param A1 Pointer to the first media register size operand (input/output).
2727	; @param A2 Pointer to the second media register size operand (input).
2728	;
2729	%macro IEMIMPL_MEDIA_F2 1
2730	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2731	PROLOGUE_3_ARGS
2732	IEMIMPL_MMX_PROLOGUE
2733
2734	movq mm0, [A1]
2735	movq mm1, [A2]
2736	%1 mm0, mm1
2737	movq [A1], mm0
2738
2739	IEMIMPL_MMX_EPILOGUE
2740	EPILOGUE_3_ARGS
2741	ENDPROC iemAImpl_ %+ %1 %+ _u64
2742
2743	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2744	PROLOGUE_3_ARGS
2745	IEMIMPL_SSE_PROLOGUE
2746
2747	movdqu xmm0, [A1]
2748	movdqu xmm1, [A2]
2749	%1 xmm0, xmm1
2750	movdqu [A1], xmm0
2751
2752	IEMIMPL_SSE_EPILOGUE
2753	EPILOGUE_3_ARGS
2754	ENDPROC iemAImpl_ %+ %1 %+ _u128
2755	%endmacro
2756
2757	IEMIMPL_MEDIA_F2 pxor
2758	IEMIMPL_MEDIA_F2 pcmpeqb
2759	IEMIMPL_MEDIA_F2 pcmpeqw
2760	IEMIMPL_MEDIA_F2 pcmpeqd
2761
2762
2763	;;
2764	; Media instruction working on one full sized and one half sized register (lower half).
2765	;
2766	; @param 1 The instruction
2767	; @param 2 1 if MMX is included, 0 if not.
2768	;
2769	; @param A0 FPU context (fxsave).
2770	; @param A1 Pointer to the first full sized media register operand (input/output).
2771	; @param A2 Pointer to the second half sized media register operand (input).
2772	;
2773	%macro IEMIMPL_MEDIA_F1L1 2
2774	%if %2 != 0
2775	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2776	PROLOGUE_3_ARGS
2777	IEMIMPL_MMX_PROLOGUE
2778
2779	movq mm0, [A1]
2780	movd mm1, [A2]
2781	%1 mm0, mm1
2782	movq [A1], mm0
2783
2784	IEMIMPL_MMX_EPILOGUE
2785	EPILOGUE_3_ARGS
2786	ENDPROC iemAImpl_ %+ %1 %+ _u64
2787	%endif
2788
2789	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2790	PROLOGUE_3_ARGS
2791	IEMIMPL_SSE_PROLOGUE
2792
2793	movdqu xmm0, [A1]
2794	movq xmm1, [A2]
2795	%1 xmm0, xmm1
2796	movdqu [A1], xmm0
2797
2798	IEMIMPL_SSE_EPILOGUE
2799	EPILOGUE_3_ARGS
2800	ENDPROC iemAImpl_ %+ %1 %+ _u128
2801	%endmacro
2802
2803	IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2804	IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2805	IEMIMPL_MEDIA_F1L1 punpckldq, 1
2806	IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2807
2808
2809	;;
2810	; Media instruction working on one full sized and one half sized register (high half).
2811	;
2812	; @param 1 The instruction
2813	; @param 2 1 if MMX is included, 0 if not.
2814	;
2815	; @param A0 FPU context (fxsave).
2816	; @param A1 Pointer to the first full sized media register operand (input/output).
2817	; @param A2 Pointer to the second full sized media register operand, where we
2818	; will only use the upper half (input).
2819	;
2820	%macro IEMIMPL_MEDIA_F1H1 2
2821	%if %2 != 0
2822	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2823	PROLOGUE_3_ARGS
2824	IEMIMPL_MMX_PROLOGUE
2825
2826	movq mm0, [A1]
2827	movq mm1, [A2]
2828	%1 mm0, mm1
2829	movq [A1], mm0
2830
2831	IEMIMPL_MMX_EPILOGUE
2832	EPILOGUE_3_ARGS
2833	ENDPROC iemAImpl_ %+ %1 %+ _u64
2834	%endif
2835
2836	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2837	PROLOGUE_3_ARGS
2838	IEMIMPL_SSE_PROLOGUE
2839
2840	movdqu xmm0, [A1]
2841	movdqu xmm1, [A2]
2842	%1 xmm0, xmm1
2843	movdqu [A1], xmm0
2844
2845	IEMIMPL_SSE_EPILOGUE
2846	EPILOGUE_3_ARGS
2847	ENDPROC iemAImpl_ %+ %1 %+ _u128
2848	%endmacro
2849
2850	IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2851	IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2852	IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2853	IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2854
2855
2856	;
2857	; Shufflers with evil 8-bit immediates.
2858	;
2859
2860	BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2861	PROLOGUE_4_ARGS
2862	IEMIMPL_MMX_PROLOGUE
2863
2864	movq mm0, [A1]
2865	movq mm1, [A2]
2866	lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2867	lea T1, [.imm0 xWrtRIP]
2868	lea T1, [T1 + T0]
2869	call T1
2870	movq [A1], mm0
2871
2872	IEMIMPL_MMX_EPILOGUE
2873	EPILOGUE_4_ARGS
2874	%assign bImm 0
2875	%rep 256
2876	.imm %+ bImm:
2877	pshufw mm0, mm1, bImm
2878	ret
2879	%assign bImm bImm + 1
2880	%endrep
2881	.immEnd: ; 256*5 == 0x500
2882	dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2883	dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2884	ENDPROC iemAImpl_pshufw
2885
2886
2887	%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2888	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2889	PROLOGUE_4_ARGS
2890	IEMIMPL_SSE_PROLOGUE
2891
2892	movdqu xmm0, [A1]
2893	movdqu xmm1, [A2]
2894	lea T1, [.imm0 xWrtRIP]
2895	lea T0, [A3 + A32] ; sizeof(pshufXX+ret) == 6: (A3 3) *2
2896	lea T1, [T1 + T0*2]
2897	call T1
2898	movdqu [A1], xmm0
2899
2900	IEMIMPL_SSE_EPILOGUE
2901	EPILOGUE_4_ARGS
2902	%assign bImm 0
2903	%rep 256
2904	.imm %+ bImm:
2905	%1 xmm0, xmm1, bImm
2906	ret
2907	%assign bImm bImm + 1
2908	%endrep
2909	.immEnd: ; 256*6 == 0x600
2910	dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2911	dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2912	ENDPROC iemAImpl_ %+ %1
2913	%endmacro
2914
2915	IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2916	IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2917	IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2918
2919
2920	;
2921	; Move byte mask.
2922	;
2923
2924	BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2925	PROLOGUE_3_ARGS
2926	IEMIMPL_MMX_PROLOGUE
2927
2928	mov T0, [A1]
2929	movq mm1, [A2]
2930	pmovmskb T0, mm1
2931	mov [A1], T0
2932	%ifdef RT_ARCH_X86
2933	mov dword [A1 + 4], 0
2934	%endif
2935	IEMIMPL_MMX_EPILOGUE
2936	EPILOGUE_3_ARGS
2937	ENDPROC iemAImpl_pmovmskb_u64
2938
2939	BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
2940	PROLOGUE_3_ARGS
2941	IEMIMPL_SSE_PROLOGUE
2942
2943	mov T0, [A1]
2944	movdqu xmm1, [A2]
2945	pmovmskb T0, xmm1
2946	mov [A1], T0
2947	%ifdef RT_ARCH_X86
2948	mov dword [A1 + 4], 0
2949	%endif
2950	IEMIMPL_SSE_EPILOGUE
2951	EPILOGUE_3_ARGS
2952	ENDPROC iemAImpl_pmovmskb_u128
2953

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 62478

Download in other formats: