IEMAllAImpl.asm@ 41829

Last change on this file since 41829 was 40523, checked in by vboxsync, 13 years ago
Fixes argument counting messup in various fist and fst assembly implementations.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 61.8 KB

Line
1	; $Id: IEMAllAImpl.asm 40523 2012-03-18 18:14:24Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	; Copyright (C) 2011-2012 Oracle Corporation
7	;
8	; This file is part of VirtualBox Open Source Edition (OSE), as
9	; available from http://www.virtualbox.org. This file is free software;
10	; you can redistribute it and/or modify it under the terms of the GNU
11	; General Public License (GPL) as published by the Free Software
12	; Foundation, in version 2 as it comes in the "COPYING" file of the
13	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	;
16
17
18	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19	; Header Files ;
20	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21	%include "VBox/asmdefs.mac"
22	%include "VBox/err.mac"
23	%include "iprt/x86.mac"
24
25
26	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27	; Defined Constants And Macros ;
28	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30	;;
31	; RET XX / RET wrapper for fastcall.
32	;
33	%macro RET_FASTCALL 1
34	%ifdef RT_ARCH_X86
35	%ifdef RT_OS_WINDOWS
36	ret %1
37	%else
38	ret
39	%endif
40	%else
41	ret
42	%endif
43	%endmacro
44
45	;;
46	; NAME for fastcall functions.
47	;
48	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49	; escaping (or whatever the dollar is good for here). Thus the ugly
50	; prefix argument.
51	;
52	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Dollar) NAME(a_Name)
53	%ifdef RT_ARCH_X86
54	%ifdef RT_OS_WINDOWS
55	%undef NAME_FASTCALL
56	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57	%endif
58	%endif
59
60	;;
61	; BEGINPROC for fastcall functions.
62	;
63	; @param 1 The function name (C).
64	; @param 2 The argument size on x86.
65	;
66	%macro BEGINPROC_FASTCALL 2
67	%ifdef ASM_FORMAT_PE
68	export %1=NAME_FASTCALL(%1,%2,$@)
69	%endif
70	%ifdef __NASM__
71	%ifdef ASM_FORMAT_OMF
72	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73	%endif
74	%endif
75	%ifndef ASM_FORMAT_BIN
76	global NAME_FASTCALL(%1,%2,$@)
77	%endif
78	NAME_FASTCALL(%1,%2,@):
79	%endmacro
80
81
82	;
83	; We employ some macro assembly here to hid the calling convention differences.
84	;
85	%ifdef RT_ARCH_AMD64
86	%macro PROLOGUE_1_ARGS 0
87	%endmacro
88	%macro EPILOGUE_1_ARGS 1
89	ret
90	%endmacro
91
92	%macro PROLOGUE_2_ARGS 0
93	%endmacro
94	%macro EPILOGUE_2_ARGS 1
95	ret
96	%endmacro
97
98	%macro PROLOGUE_3_ARGS 0
99	%endmacro
100	%macro EPILOGUE_3_ARGS 1
101	ret
102	%endmacro
103
104	%macro PROLOGUE_4_ARGS 0
105	%endmacro
106	%macro EPILOGUE_4_ARGS 1
107	ret
108	%endmacro
109
110	%ifdef ASM_CALL64_GCC
111	%define A0 rdi
112	%define A0_32 edi
113	%define A0_16 di
114	%define A0_8 dil
115
116	%define A1 rsi
117	%define A1_32 esi
118	%define A1_16 si
119	%define A1_8 sil
120
121	%define A2 rdx
122	%define A2_32 edx
123	%define A2_16 dx
124	%define A2_8 dl
125
126	%define A3 rcx
127	%define A3_32 ecx
128	%define A3_16 cx
129	%endif
130
131	%ifdef ASM_CALL64_MSC
132	%define A0 rcx
133	%define A0_32 ecx
134	%define A0_16 cx
135	%define A0_8 cl
136
137	%define A1 rdx
138	%define A1_32 edx
139	%define A1_16 dx
140	%define A1_8 dl
141
142	%define A2 r8
143	%define A2_32 r8d
144	%define A2_16 r8w
145	%define A2_8 r8b
146
147	%define A3 r9
148	%define A3_32 r9d
149	%define A3_16 r9w
150	%endif
151
152	%define T0 rax
153	%define T0_32 eax
154	%define T0_16 ax
155	%define T0_8 al
156
157	%define T1 r11
158	%define T1_32 r11d
159	%define T1_16 r11w
160	%define T1_8 r11b
161
162	%else
163	; x86
164	%macro PROLOGUE_1_ARGS 0
165	push edi
166	%endmacro
167	%macro EPILOGUE_1_ARGS 1
168	pop edi
169	ret %1
170	%endmacro
171
172	%macro PROLOGUE_2_ARGS 0
173	push edi
174	%endmacro
175	%macro EPILOGUE_2_ARGS 1
176	pop edi
177	ret %1
178	%endmacro
179
180	%macro PROLOGUE_3_ARGS 0
181	push ebx
182	mov ebx, [esp + 4 + 4]
183	push edi
184	%endmacro
185	%macro EPILOGUE_3_ARGS 1
186	pop edi
187	pop ebx
188	ret %1
189	%endmacro
190
191	%macro PROLOGUE_4_ARGS 0
192	push ebx
193	push edi
194	push esi
195	mov ebx, [esp + 12 + 4 + 0]
196	mov esi, [esp + 12 + 4 + 4]
197	%endmacro
198	%macro EPILOGUE_4_ARGS 1
199	pop esi
200	pop edi
201	pop ebx
202	ret %1
203	%endmacro
204
205	%define A0 ecx
206	%define A0_32 ecx
207	%define A0_16 cx
208	%define A0_8 cl
209
210	%define A1 edx
211	%define A1_32 edx
212	%define A1_16 dx
213	%define A1_8 dl
214
215	%define A2 ebx
216	%define A2_32 ebx
217	%define A2_16 bx
218	%define A2_8 bl
219
220	%define A3 esi
221	%define A3_32 esi
222	%define A3_16 si
223
224	%define T0 eax
225	%define T0_32 eax
226	%define T0_16 ax
227	%define T0_8 al
228
229	%define T1 edi
230	%define T1_32 edi
231	%define T1_16 di
232	%endif
233
234
235	;;
236	; Load the relevant flags from [%1] if there are undefined flags (%3).
237	;
238	; @remarks Clobbers T0, stack. Changes EFLAGS.
239	; @param A2 The register pointing to the flags.
240	; @param 1 The parameter (A0..A3) pointing to the eflags.
241	; @param 2 The set of modified flags.
242	; @param 3 The set of undefined flags.
243	;
244	%macro IEM_MAYBE_LOAD_FLAGS 3
245	;%if (%3) != 0
246	pushf ; store current flags
247	mov T0_32, [%1] ; load the guest flags
248	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
249	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
250	or [xSP], T0 ; merge guest flags with host flags.
251	popf ; load the mixed flags.
252	;%endif
253	%endmacro
254
255	;;
256	; Update the flag.
257	;
258	; @remarks Clobbers T0, T1, stack.
259	; @param 1 The register pointing to the EFLAGS.
260	; @param 2 The mask of modified flags to save.
261	; @param 3 The mask of undefined flags to (maybe) save.
262	;
263	%macro IEM_SAVE_FLAGS 3
264	%if (%2 \| %3) != 0
265	pushf
266	pop T1
267	mov T0_32, [%1] ; flags
268	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
269	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
270	or T0_32, T1_32 ; combine the flags.
271	mov [%1], T0_32 ; save the flags.
272	%endif
273	%endmacro
274
275
276	;;
277	; Macro for implementing a binary operator.
278	;
279	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
280	; variants, except on 32-bit system where the 64-bit accesses requires hand
281	; coding.
282	;
283	; All the functions takes a pointer to the destination memory operand in A0,
284	; the source register operand in A1 and a pointer to eflags in A2.
285	;
286	; @param 1 The instruction mnemonic.
287	; @param 2 Non-zero if there should be a locked version.
288	; @param 3 The modified flags.
289	; @param 4 The undefined flags.
290	;
291	%macro IEMIMPL_BIN_OP 4
292	BEGINCODE
293	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
294	PROLOGUE_3_ARGS
295	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
296	%1 byte [A0], A1_8
297	IEM_SAVE_FLAGS A2, %3, %4
298	EPILOGUE_3_ARGS 4
299	ENDPROC iemAImpl_ %+ %1 %+ _u8
300
301	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
302	PROLOGUE_3_ARGS
303	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
304	%1 word [A0], A1_16
305	IEM_SAVE_FLAGS A2, %3, %4
306	EPILOGUE_3_ARGS 4
307	ENDPROC iemAImpl_ %+ %1 %+ _u16
308
309	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
310	PROLOGUE_3_ARGS
311	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
312	%1 dword [A0], A1_32
313	IEM_SAVE_FLAGS A2, %3, %4
314	EPILOGUE_3_ARGS 4
315	ENDPROC iemAImpl_ %+ %1 %+ _u32
316
317	%ifdef RT_ARCH_AMD64
318	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
319	PROLOGUE_3_ARGS
320	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
321	%1 qword [A0], A1
322	IEM_SAVE_FLAGS A2, %3, %4
323	EPILOGUE_3_ARGS 8
324	ENDPROC iemAImpl_ %+ %1 %+ _u64
325	%else ; stub it for now - later, replace with hand coded stuff.
326	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
327	int3
328	ret
329	ENDPROC iemAImpl_ %+ %1 %+ _u64
330	%endif ; !RT_ARCH_AMD64
331
332	%if %2 != 0 ; locked versions requested?
333
334	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
335	PROLOGUE_3_ARGS
336	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337	lock %1 byte [A0], A1_8
338	IEM_SAVE_FLAGS A2, %3, %4
339	EPILOGUE_3_ARGS 4
340	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
341
342	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
343	PROLOGUE_3_ARGS
344	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345	lock %1 word [A0], A1_16
346	IEM_SAVE_FLAGS A2, %3, %4
347	EPILOGUE_3_ARGS 4
348	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
349
350	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
351	PROLOGUE_3_ARGS
352	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353	lock %1 dword [A0], A1_32
354	IEM_SAVE_FLAGS A2, %3, %4
355	EPILOGUE_3_ARGS 4
356	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
357
358	%ifdef RT_ARCH_AMD64
359	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
360	PROLOGUE_3_ARGS
361	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
362	lock %1 qword [A0], A1
363	IEM_SAVE_FLAGS A2, %3, %4
364	EPILOGUE_3_ARGS 8
365	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
366	%else ; stub it for now - later, replace with hand coded stuff.
367	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
368	int3
369	ret 8
370	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
371	%endif ; !RT_ARCH_AMD64
372	%endif ; locked
373	%endmacro
374
375	; instr,lock,modified-flags.
376	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
377	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
378	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
379	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
380	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
381	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
382	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
383	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
384	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
385
386
387	;;
388	; Macro for implementing a bit operator.
389	;
390	; This will generate code for the 16, 32 and 64 bit accesses with locked
391	; variants, except on 32-bit system where the 64-bit accesses requires hand
392	; coding.
393	;
394	; All the functions takes a pointer to the destination memory operand in A0,
395	; the source register operand in A1 and a pointer to eflags in A2.
396	;
397	; @param 1 The instruction mnemonic.
398	; @param 2 Non-zero if there should be a locked version.
399	; @param 3 The modified flags.
400	; @param 4 The undefined flags.
401	;
402	%macro IEMIMPL_BIT_OP 4
403	BEGINCODE
404	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
405	PROLOGUE_3_ARGS
406	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
407	%1 word [A0], A1_16
408	IEM_SAVE_FLAGS A2, %3, %4
409	EPILOGUE_3_ARGS 4
410	ENDPROC iemAImpl_ %+ %1 %+ _u16
411
412	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
413	PROLOGUE_3_ARGS
414	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
415	%1 dword [A0], A1_32
416	IEM_SAVE_FLAGS A2, %3, %4
417	EPILOGUE_3_ARGS 4
418	ENDPROC iemAImpl_ %+ %1 %+ _u32
419
420	%ifdef RT_ARCH_AMD64
421	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
422	PROLOGUE_3_ARGS
423	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
424	%1 qword [A0], A1
425	IEM_SAVE_FLAGS A2, %3, %4
426	EPILOGUE_3_ARGS 8
427	ENDPROC iemAImpl_ %+ %1 %+ _u64
428	%else ; stub it for now - later, replace with hand coded stuff.
429	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
430	int3
431	ret 8
432	ENDPROC iemAImpl_ %+ %1 %+ _u64
433	%endif ; !RT_ARCH_AMD64
434
435	%if %2 != 0 ; locked versions requested?
436
437	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
438	PROLOGUE_3_ARGS
439	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
440	lock %1 word [A0], A1_16
441	IEM_SAVE_FLAGS A2, %3, %4
442	EPILOGUE_3_ARGS 4
443	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
444
445	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
446	PROLOGUE_3_ARGS
447	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
448	lock %1 dword [A0], A1_32
449	IEM_SAVE_FLAGS A2, %3, %4
450	EPILOGUE_3_ARGS 4
451	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
452
453	%ifdef RT_ARCH_AMD64
454	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
455	PROLOGUE_3_ARGS
456	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
457	lock %1 qword [A0], A1
458	IEM_SAVE_FLAGS A2, %3, %4
459	EPILOGUE_3_ARGS 8
460	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
461	%else ; stub it for now - later, replace with hand coded stuff.
462	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
463	int3
464	ret 8
465	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
466	%endif ; !RT_ARCH_AMD64
467	%endif ; locked
468	%endmacro
469	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
470	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
471	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
472	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
473
474	;;
475	; Macro for implementing a bit search operator.
476	;
477	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
478	; system where the 64-bit accesses requires hand coding.
479	;
480	; All the functions takes a pointer to the destination memory operand in A0,
481	; the source register operand in A1 and a pointer to eflags in A2.
482	;
483	; @param 1 The instruction mnemonic.
484	; @param 2 The modified flags.
485	; @param 3 The undefined flags.
486	;
487	%macro IEMIMPL_BIT_OP 3
488	BEGINCODE
489	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
490	PROLOGUE_3_ARGS
491	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
492	%1 T0_16, A1_16
493	mov [A0], T0_16
494	IEM_SAVE_FLAGS A2, %2, %3
495	EPILOGUE_3_ARGS 4
496	ENDPROC iemAImpl_ %+ %1 %+ _u16
497
498	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
499	PROLOGUE_3_ARGS
500	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
501	%1 T0_32, A1_32
502	mov [A0], T0_32
503	IEM_SAVE_FLAGS A2, %2, %3
504	EPILOGUE_3_ARGS 4
505	ENDPROC iemAImpl_ %+ %1 %+ _u32
506
507	%ifdef RT_ARCH_AMD64
508	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
509	PROLOGUE_3_ARGS
510	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
511	%1 T0, A1
512	mov [A0], T0
513	IEM_SAVE_FLAGS A2, %2, %3
514	EPILOGUE_3_ARGS 8
515	ENDPROC iemAImpl_ %+ %1 %+ _u64
516	%else ; stub it for now - later, replace with hand coded stuff.
517	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
518	int3
519	ret 8
520	ENDPROC iemAImpl_ %+ %1 %+ _u64
521	%endif ; !RT_ARCH_AMD64
522	%endmacro
523	IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
524	IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
525
526
527	;
528	; IMUL is also a similar but yet different case (no lock, no mem dst).
529	; The rDX:rAX variant of imul is handled together with mul further down.
530	;
531	BEGINCODE
532	BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
533	PROLOGUE_3_ARGS
534	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
535	imul A1_16, word [A0]
536	mov [A0], A1_16
537	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
538	EPILOGUE_3_ARGS 4
539	ENDPROC iemAImpl_imul_two_u16
540
541	BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
542	PROLOGUE_3_ARGS
543	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
544	imul A1_32, dword [A0]
545	mov [A0], A1_32
546	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
547	EPILOGUE_3_ARGS 4
548	ENDPROC iemAImpl_imul_two_u32
549
550	BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
551	PROLOGUE_3_ARGS
552	%ifdef RT_ARCH_AMD64
553	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
554	imul A1, qword [A0]
555	mov [A0], A1
556	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
557	%else
558	int3 ;; @todo implement me
559	%endif
560	EPILOGUE_3_ARGS 8
561	ENDPROC iemAImpl_imul_two_u64
562
563
564	;
565	; XCHG for memory operands. This implies locking. No flag changes.
566	;
567	; Each function takes two arguments, first the pointer to the memory,
568	; then the pointer to the register. They all return void.
569	;
570	BEGINCODE
571	BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
572	PROLOGUE_2_ARGS
573	mov T0_8, [A1]
574	xchg [A0], T0_8
575	mov [A1], T0_8
576	EPILOGUE_2_ARGS 0
577	ENDPROC iemAImpl_xchg_u8
578
579	BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
580	PROLOGUE_2_ARGS
581	mov T0_16, [A1]
582	xchg [A0], T0_16
583	mov [A1], T0_16
584	EPILOGUE_2_ARGS 0
585	ENDPROC iemAImpl_xchg_u16
586
587	BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
588	PROLOGUE_2_ARGS
589	mov T0_32, [A1]
590	xchg [A0], T0_32
591	mov [A1], T0_32
592	EPILOGUE_2_ARGS 0
593	ENDPROC iemAImpl_xchg_u32
594
595	BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
596	%ifdef RT_ARCH_AMD64
597	PROLOGUE_2_ARGS
598	mov T0, [A1]
599	xchg [A0], T0
600	mov [A1], T0
601	EPILOGUE_2_ARGS 0
602	%else
603	int3
604	ret 0
605	%endif
606	ENDPROC iemAImpl_xchg_u64
607
608
609	;
610	; XADD for memory operands.
611	;
612	; Each function takes three arguments, first the pointer to the
613	; memory/register, then the pointer to the register, and finally a pointer to
614	; eflags. They all return void.
615	;
616	BEGINCODE
617	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
618	PROLOGUE_3_ARGS
619	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
620	mov T0_8, [A1]
621	xadd [A0], T0_8
622	mov [A1], T0_8
623	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
624	EPILOGUE_3_ARGS 4
625	ENDPROC iemAImpl_xadd_u8
626
627	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
628	PROLOGUE_3_ARGS
629	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
630	mov T0_16, [A1]
631	xadd [A0], T0_16
632	mov [A1], T0_16
633	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
634	EPILOGUE_3_ARGS 4
635	ENDPROC iemAImpl_xadd_u16
636
637	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
638	PROLOGUE_3_ARGS
639	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
640	mov T0_32, [A1]
641	xadd [A0], T0_32
642	mov [A1], T0_32
643	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
644	EPILOGUE_3_ARGS 4
645	ENDPROC iemAImpl_xadd_u32
646
647	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
648	%ifdef RT_ARCH_AMD64
649	PROLOGUE_3_ARGS
650	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
651	mov T0, [A1]
652	xadd [A0], T0
653	mov [A1], T0
654	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
655	EPILOGUE_3_ARGS 4
656	%else
657	int3
658	ret 4
659	%endif
660	ENDPROC iemAImpl_xadd_u64
661
662	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
663	PROLOGUE_3_ARGS
664	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
665	mov T0_8, [A1]
666	lock xadd [A0], T0_8
667	mov [A1], T0_8
668	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
669	EPILOGUE_3_ARGS 4
670	ENDPROC iemAImpl_xadd_u8_locked
671
672	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
673	PROLOGUE_3_ARGS
674	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
675	mov T0_16, [A1]
676	lock xadd [A0], T0_16
677	mov [A1], T0_16
678	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
679	EPILOGUE_3_ARGS 4
680	ENDPROC iemAImpl_xadd_u16_locked
681
682	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
683	PROLOGUE_3_ARGS
684	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
685	mov T0_32, [A1]
686	lock xadd [A0], T0_32
687	mov [A1], T0_32
688	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
689	EPILOGUE_3_ARGS 4
690	ENDPROC iemAImpl_xadd_u32_locked
691
692	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
693	%ifdef RT_ARCH_AMD64
694	PROLOGUE_3_ARGS
695	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
696	mov T0, [A1]
697	lock xadd [A0], T0
698	mov [A1], T0
699	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
700	EPILOGUE_3_ARGS 4
701	%else
702	int3
703	ret 4
704	%endif
705	ENDPROC iemAImpl_xadd_u64_locked
706
707
708	;;
709	; Macro for implementing a unary operator.
710	;
711	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
712	; variants, except on 32-bit system where the 64-bit accesses requires hand
713	; coding.
714	;
715	; All the functions takes a pointer to the destination memory operand in A0,
716	; the source register operand in A1 and a pointer to eflags in A2.
717	;
718	; @param 1 The instruction mnemonic.
719	; @param 2 The modified flags.
720	; @param 3 The undefined flags.
721	;
722	%macro IEMIMPL_UNARY_OP 3
723	BEGINCODE
724	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
725	PROLOGUE_2_ARGS
726	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
727	%1 byte [A0]
728	IEM_SAVE_FLAGS A1, %2, %3
729	EPILOGUE_2_ARGS 0
730	ENDPROC iemAImpl_ %+ %1 %+ _u8
731
732	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
733	PROLOGUE_2_ARGS
734	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
735	lock %1 byte [A0]
736	IEM_SAVE_FLAGS A1, %2, %3
737	EPILOGUE_2_ARGS 0
738	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
739
740	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
741	PROLOGUE_2_ARGS
742	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
743	%1 word [A0]
744	IEM_SAVE_FLAGS A1, %2, %3
745	EPILOGUE_2_ARGS 0
746	ENDPROC iemAImpl_ %+ %1 %+ _u16
747
748	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
749	PROLOGUE_2_ARGS
750	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
751	lock %1 word [A0]
752	IEM_SAVE_FLAGS A1, %2, %3
753	EPILOGUE_2_ARGS 0
754	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
755
756	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
757	PROLOGUE_2_ARGS
758	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
759	%1 dword [A0]
760	IEM_SAVE_FLAGS A1, %2, %3
761	EPILOGUE_2_ARGS 0
762	ENDPROC iemAImpl_ %+ %1 %+ _u32
763
764	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
765	PROLOGUE_2_ARGS
766	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
767	lock %1 dword [A0]
768	IEM_SAVE_FLAGS A1, %2, %3
769	EPILOGUE_2_ARGS 0
770	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
771
772	%ifdef RT_ARCH_AMD64
773	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
774	PROLOGUE_2_ARGS
775	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
776	%1 qword [A0]
777	IEM_SAVE_FLAGS A1, %2, %3
778	EPILOGUE_2_ARGS 0
779	ENDPROC iemAImpl_ %+ %1 %+ _u64
780
781	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
782	PROLOGUE_2_ARGS
783	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
784	lock %1 qword [A0]
785	IEM_SAVE_FLAGS A1, %2, %3
786	EPILOGUE_2_ARGS 0
787	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
788	%else
789	; stub them for now.
790	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
791	int3
792	ret 0
793	ENDPROC iemAImpl_ %+ %1 %+ _u64
794	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
795	int3
796	ret 0
797	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
798	%endif
799
800	%endmacro
801
802	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
803	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
804	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
805	IEMIMPL_UNARY_OP not, 0, 0
806
807
808
809	;;
810	; Macro for implementing a shift operation.
811	;
812	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
813	; 32-bit system where the 64-bit accesses requires hand coding.
814	;
815	; All the functions takes a pointer to the destination memory operand in A0,
816	; the shift count in A1 and a pointer to eflags in A2.
817	;
818	; @param 1 The instruction mnemonic.
819	; @param 2 The modified flags.
820	; @param 3 The undefined flags.
821	;
822	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
823	;
824	%macro IEMIMPL_SHIFT_OP 3
825	BEGINCODE
826	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
827	PROLOGUE_3_ARGS
828	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
829	%ifdef ASM_CALL64_GCC
830	mov cl, A1_8
831	%1 byte [A0], cl
832	%else
833	xchg A1, A0
834	%1 byte [A1], cl
835	%endif
836	IEM_SAVE_FLAGS A2, %2, %3
837	EPILOGUE_3_ARGS 4
838	ENDPROC iemAImpl_ %+ %1 %+ _u8
839
840	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
841	PROLOGUE_3_ARGS
842	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
843	%ifdef ASM_CALL64_GCC
844	mov cl, A1_8
845	%1 word [A0], cl
846	%else
847	xchg A1, A0
848	%1 word [A1], cl
849	%endif
850	IEM_SAVE_FLAGS A2, %2, %3
851	EPILOGUE_3_ARGS 4
852	ENDPROC iemAImpl_ %+ %1 %+ _u16
853
854	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
855	PROLOGUE_3_ARGS
856	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
857	%ifdef ASM_CALL64_GCC
858	mov cl, A1_8
859	%1 dword [A0], cl
860	%else
861	xchg A1, A0
862	%1 dword [A1], cl
863	%endif
864	IEM_SAVE_FLAGS A2, %2, %3
865	EPILOGUE_3_ARGS 4
866	ENDPROC iemAImpl_ %+ %1 %+ _u32
867
868	%ifdef RT_ARCH_AMD64
869	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
870	PROLOGUE_3_ARGS
871	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
872	%ifdef ASM_CALL64_GCC
873	mov cl, A1_8
874	%1 qword [A0], cl
875	%else
876	xchg A1, A0
877	%1 qword [A1], cl
878	%endif
879	IEM_SAVE_FLAGS A2, %2, %3
880	EPILOGUE_3_ARGS 4
881	ENDPROC iemAImpl_ %+ %1 %+ _u64
882	%else ; stub it for now - later, replace with hand coded stuff.
883	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
884	int3
885	ret 4
886	ENDPROC iemAImpl_ %+ %1 %+ _u64
887	%endif ; !RT_ARCH_AMD64
888
889	%endmacro
890
891	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
892	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
893	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
894	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
895	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
896	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
897	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
898
899
900	;;
901	; Macro for implementing a double precision shift operation.
902	;
903	; This will generate code for the 16, 32 and 64 bit accesses, except on
904	; 32-bit system where the 64-bit accesses requires hand coding.
905	;
906	; The functions takes the destination operand (r/m) in A0, the source (reg) in
907	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
908	;
909	; @param 1 The instruction mnemonic.
910	; @param 2 The modified flags.
911	; @param 3 The undefined flags.
912	;
913	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
914	;
915	%macro IEMIMPL_SHIFT_DBL_OP 3
916	BEGINCODE
917	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
918	PROLOGUE_4_ARGS
919	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
920	%ifdef ASM_CALL64_GCC
921	xchg A3, A2
922	%1 [A0], A1_16, cl
923	xchg A3, A2
924	%else
925	xchg A0, A2
926	%1 [A2], A1_16, cl
927	%endif
928	IEM_SAVE_FLAGS A3, %2, %3
929	EPILOGUE_4_ARGS 8
930	ENDPROC iemAImpl_ %+ %1 %+ _u16
931
932	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
933	PROLOGUE_4_ARGS
934	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
935	%ifdef ASM_CALL64_GCC
936	xchg A3, A2
937	%1 [A0], A1_32, cl
938	xchg A3, A2
939	%else
940	xchg A0, A2
941	%1 [A2], A1_32, cl
942	%endif
943	IEM_SAVE_FLAGS A3, %2, %3
944	EPILOGUE_4_ARGS 8
945	ENDPROC iemAImpl_ %+ %1 %+ _u32
946
947	%ifdef RT_ARCH_AMD64
948	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
949	PROLOGUE_4_ARGS
950	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
951	%ifdef ASM_CALL64_GCC
952	xchg A3, A2
953	%1 [A0], A1, cl
954	xchg A3, A2
955	%else
956	xchg A0, A2
957	%1 [A2], A1, cl
958	%endif
959	IEM_SAVE_FLAGS A3, %2, %3
960	EPILOGUE_4_ARGS 12
961	ENDPROC iemAImpl_ %+ %1 %+ _u64
962	%else ; stub it for now - later, replace with hand coded stuff.
963	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
964	int3
965	ret 12
966	ENDPROC iemAImpl_ %+ %1 %+ _u64
967	%endif ; !RT_ARCH_AMD64
968
969	%endmacro
970
971	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
972	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
973
974
975	;;
976	; Macro for implementing a multiplication operations.
977	;
978	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
979	; 32-bit system where the 64-bit accesses requires hand coding.
980	;
981	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
982	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
983	; pointer to eflags in A3.
984	;
985	; The functions all return 0 so the caller can be used for div/idiv as well as
986	; for the mul/imul implementation.
987	;
988	; @param 1 The instruction mnemonic.
989	; @param 2 The modified flags.
990	; @param 3 The undefined flags.
991	;
992	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
993	;
994	%macro IEMIMPL_MUL_OP 3
995	BEGINCODE
996	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
997	PROLOGUE_3_ARGS
998	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
999	mov al, [A0]
1000	%1 A1_8
1001	mov [A0], ax
1002	IEM_SAVE_FLAGS A2, %2, %3
1003	xor eax, eax
1004	EPILOGUE_3_ARGS 4
1005	ENDPROC iemAImpl_ %+ %1 %+ _u8
1006
1007	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1008	PROLOGUE_4_ARGS
1009	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1010	mov ax, [A0]
1011	%ifdef ASM_CALL64_GCC
1012	%1 A2_16
1013	mov [A0], ax
1014	mov [A1], dx
1015	%else
1016	mov T1, A1
1017	%1 A2_16
1018	mov [A0], ax
1019	mov [T1], dx
1020	%endif
1021	IEM_SAVE_FLAGS A3, %2, %3
1022	xor eax, eax
1023	EPILOGUE_4_ARGS 8
1024	ENDPROC iemAImpl_ %+ %1 %+ _u16
1025
1026	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1027	PROLOGUE_4_ARGS
1028	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1029	mov eax, [A0]
1030	%ifdef ASM_CALL64_GCC
1031	%1 A2_32
1032	mov [A0], eax
1033	mov [A1], edx
1034	%else
1035	mov T1, A1
1036	%1 A2_32
1037	mov [A0], eax
1038	mov [T1], edx
1039	%endif
1040	IEM_SAVE_FLAGS A3, %2, %3
1041	xor eax, eax
1042	EPILOGUE_4_ARGS 8
1043	ENDPROC iemAImpl_ %+ %1 %+ _u32
1044
1045	%ifdef RT_ARCH_AMD64
1046	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1047	PROLOGUE_4_ARGS
1048	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1049	mov rax, [A0]
1050	%ifdef ASM_CALL64_GCC
1051	%1 A2
1052	mov [A0], rax
1053	mov [A1], rdx
1054	%else
1055	mov T1, A1
1056	%1 A2
1057	mov [A0], rax
1058	mov [T1], rdx
1059	%endif
1060	IEM_SAVE_FLAGS A3, %2, %3
1061	xor eax, eax
1062	EPILOGUE_4_ARGS 12
1063	ENDPROC iemAImpl_ %+ %1 %+ _u64
1064	%else ; stub it for now - later, replace with hand coded stuff.
1065	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1066	int3
1067	ret 12
1068	ENDPROC iemAImpl_ %+ %1 %+ _u64
1069	%endif ; !RT_ARCH_AMD64
1070
1071	%endmacro
1072
1073	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1074	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1075
1076
1077	;;
1078	; Macro for implementing a division operations.
1079	;
1080	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1081	; 32-bit system where the 64-bit accesses requires hand coding.
1082	;
1083	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1084	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1085	; pointer to eflags in A3.
1086	;
1087	; The functions all return 0 on success and -1 if a divide error should be
1088	; raised by the caller.
1089	;
1090	; @param 1 The instruction mnemonic.
1091	; @param 2 The modified flags.
1092	; @param 3 The undefined flags.
1093	;
1094	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1095	;
1096	%macro IEMIMPL_DIV_OP 3
1097	BEGINCODE
1098	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1099	PROLOGUE_3_ARGS
1100
1101	test A1_8, A1_8
1102	jz .div_zero
1103	;; @todo test for overflow
1104
1105	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1106	mov ax, [A0]
1107	%1 A1_8
1108	mov [A0], ax
1109	IEM_SAVE_FLAGS A2, %2, %3
1110	xor eax, eax
1111
1112	.return:
1113	EPILOGUE_3_ARGS 4
1114
1115	.div_zero:
1116	mov eax, -1
1117	jmp .return
1118	ENDPROC iemAImpl_ %+ %1 %+ _u8
1119
1120	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1121	PROLOGUE_4_ARGS
1122
1123	test A1_16, A1_16
1124	jz .div_zero
1125	;; @todo test for overflow
1126
1127	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1128	%ifdef ASM_CALL64_GCC
1129	mov T1, A2
1130	mov ax, [A0]
1131	mov dx, [A1]
1132	%1 T1_16
1133	mov [A0], ax
1134	mov [A1], dx
1135	%else
1136	mov T1, A1
1137	mov ax, [A0]
1138	mov dx, [T1]
1139	%1 A2_16
1140	mov [A0], ax
1141	mov [T1], dx
1142	%endif
1143	IEM_SAVE_FLAGS A3, %2, %3
1144	xor eax, eax
1145
1146	.return:
1147	EPILOGUE_4_ARGS 8
1148
1149	.div_zero:
1150	mov eax, -1
1151	jmp .return
1152	ENDPROC iemAImpl_ %+ %1 %+ _u16
1153
1154	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1155	PROLOGUE_4_ARGS
1156
1157	test A1_32, A1_32
1158	jz .div_zero
1159	;; @todo test for overflow
1160
1161	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1162	mov eax, [A0]
1163	%ifdef ASM_CALL64_GCC
1164	mov T1, A2
1165	mov eax, [A0]
1166	mov edx, [A1]
1167	%1 T1_32
1168	mov [A0], eax
1169	mov [A1], edx
1170	%else
1171	mov T1, A1
1172	mov eax, [A0]
1173	mov edx, [T1]
1174	%1 A2_32
1175	mov [A0], eax
1176	mov [T1], edx
1177	%endif
1178	IEM_SAVE_FLAGS A3, %2, %3
1179	xor eax, eax
1180
1181	.return:
1182	EPILOGUE_4_ARGS 8
1183
1184	.div_zero:
1185	mov eax, -1
1186	jmp .return
1187	ENDPROC iemAImpl_ %+ %1 %+ _u32
1188
1189	%ifdef RT_ARCH_AMD64
1190	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1191	PROLOGUE_4_ARGS
1192
1193	test A1, A1
1194	jz .div_zero
1195	;; @todo test for overflow
1196
1197	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1198	mov rax, [A0]
1199	%ifdef ASM_CALL64_GCC
1200	mov T1, A2
1201	mov rax, [A0]
1202	mov rdx, [A1]
1203	%1 T1
1204	mov [A0], rax
1205	mov [A1], rdx
1206	%else
1207	mov T1, A1
1208	mov rax, [A0]
1209	mov rdx, [T1]
1210	%1 A2
1211	mov [A0], rax
1212	mov [T1], rdx
1213	%endif
1214	IEM_SAVE_FLAGS A3, %2, %3
1215	xor eax, eax
1216
1217	.return:
1218	EPILOGUE_4_ARGS 12
1219
1220	.div_zero:
1221	mov eax, -1
1222	jmp .return
1223	ENDPROC iemAImpl_ %+ %1 %+ _u64
1224	%else ; stub it for now - later, replace with hand coded stuff.
1225	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1226	int3
1227	ret
1228	ENDPROC iemAImpl_ %+ %1 %+ _u64
1229	%endif ; !RT_ARCH_AMD64
1230
1231	%endmacro
1232
1233	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1234	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1235
1236
1237	;
1238	; BSWAP. No flag changes.
1239	;
1240	; Each function takes one argument, pointer to the value to bswap
1241	; (input/output). They all return void.
1242	;
1243	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1244	PROLOGUE_1_ARGS
1245	mov T0_32, [A0] ; just in case any of the upper bits are used.
1246	db 66h
1247	bswap T0_32
1248	mov [A0], T0_32
1249	EPILOGUE_1_ARGS 0
1250	ENDPROC iemAImpl_bswap_u16
1251
1252	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1253	PROLOGUE_1_ARGS
1254	mov T0_32, [A0]
1255	bswap T0_32
1256	mov [A0], T0_32
1257	EPILOGUE_1_ARGS 0
1258	ENDPROC iemAImpl_bswap_u32
1259
1260	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1261	%ifdef RT_ARCH_AMD64
1262	PROLOGUE_1_ARGS
1263	mov T0, [A0]
1264	bswap T0
1265	mov [A0], T0
1266	EPILOGUE_1_ARGS 0
1267	%else
1268	PROLOGUE_1_ARGS
1269	mov T0, [A0]
1270	mov T1, [A0 + 4]
1271	bswap T0
1272	bswap T1
1273	mov [A0 + 4], T0
1274	mov [A0], T1
1275	EPILOGUE_1_ARGS 0
1276	%endif
1277	ENDPROC iemAImpl_bswap_u64
1278
1279
1280	;;
1281	; Initialize the FPU for the actual instruction being emulated, this means
1282	; loading parts of the guest's control word and status word.
1283	;
1284	; @uses 24 bytes of stack.
1285	; @param 1 Expression giving the address of the FXSTATE of the guest.
1286	;
1287	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1288	fnstenv [xSP]
1289
1290	; FCW - for exception, precision and rounding control.
1291	movzx T0, word [%1 + X86FXSTATE.FCW]
1292	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
1293	mov [xSP + X86FSTENV32P.FCW], T0_16
1294
1295	; FSW - for undefined C0, C1, C2, and C3.
1296	movzx T1, word [%1 + X86FXSTATE.FSW]
1297	and T1, X86_FSW_C_MASK
1298	movzx T0, word [xSP + X86FSTENV32P.FSW]
1299	and T0, X86_FSW_TOP_MASK
1300	or T0, T1
1301	mov [xSP + X86FSTENV32P.FSW], T0_16
1302
1303	fldenv [xSP]
1304	%endmacro
1305
1306
1307	;;
1308	; Need to move this as well somewhere better?
1309	;
1310	struc IEMFPURESULT
1311	.r80Result resw 5
1312	.FSW resw 1
1313	endstruc
1314
1315
1316	;;
1317	; Need to move this as well somewhere better?
1318	;
1319	struc IEMFPURESULTTWO
1320	.r80Result1 resw 5
1321	.FSW resw 1
1322	.r80Result2 resw 5
1323	endstruc
1324
1325
1326	;
1327	;---------------------- 16-bit signed integer operations ----------------------
1328	;
1329
1330
1331	;;
1332	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1333	;
1334	; @param A0 FPU context (fxsave).
1335	; @param A1 Pointer to a IEMFPURESULT for the output.
1336	; @param A2 Pointer to the 16-bit floating point value to convert.
1337	;
1338	BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1339	PROLOGUE_3_ARGS
1340	sub xSP, 20h
1341
1342	fninit
1343	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1344	fild word [A2]
1345
1346	fnstsw word [A1 + IEMFPURESULT.FSW]
1347	fnclex
1348	fstp tword [A1 + IEMFPURESULT.r80Result]
1349
1350	fninit
1351	add xSP, 20h
1352	EPILOGUE_3_ARGS 0
1353	ENDPROC iemAImpl_fild_i16_to_r80
1354
1355
1356	;;
1357	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1358	;
1359	; @param A0 FPU context (fxsave).
1360	; @param A1 Where to return the output FSW.
1361	; @param A2 Where to store the 16-bit signed integer value.
1362	; @param A3 Pointer to the 80-bit value.
1363	;
1364	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1365	PROLOGUE_4_ARGS
1366	sub xSP, 20h
1367
1368	fninit
1369	fld tword [A3]
1370	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1371	fistp word [A2]
1372
1373	fnstsw word [A1]
1374
1375	fninit
1376	add xSP, 20h
1377	EPILOGUE_4_ARGS 0
1378	ENDPROC iemAImpl_fist_r80_to_i16
1379
1380
1381	;;
1382	; Store a 80-bit floating point value (register) as a 16-bit signed integer
1383	; (memory) with truncation.
1384	;
1385	; @param A0 FPU context (fxsave).
1386	; @param A1 Where to return the output FSW.
1387	; @param A2 Where to store the 16-bit signed integer value.
1388	; @param A3 Pointer to the 80-bit value.
1389	;
1390	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1391	PROLOGUE_4_ARGS
1392	sub xSP, 20h
1393
1394	fninit
1395	fld tword [A3]
1396	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1397	fisttp dword [A2]
1398
1399	fnstsw word [A1]
1400
1401	fninit
1402	add xSP, 20h
1403	EPILOGUE_4_ARGS 0
1404	ENDPROC iemAImpl_fistt_r80_to_i16
1405
1406
1407	;;
1408	; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1409	;
1410	; @param 1 The instruction
1411	;
1412	; @param A0 FPU context (fxsave).
1413	; @param A1 Pointer to a IEMFPURESULT for the output.
1414	; @param A2 Pointer to the 80-bit value.
1415	; @param A3 Pointer to the 16-bit value.
1416	;
1417	%macro IEMIMPL_FPU_R80_BY_I16 1
1418	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1419	PROLOGUE_4_ARGS
1420	sub xSP, 20h
1421
1422	fninit
1423	fld tword [A2]
1424	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1425	%1 word [A3]
1426
1427	fnstsw word [A1 + IEMFPURESULT.FSW]
1428	fnclex
1429	fstp tword [A1 + IEMFPURESULT.r80Result]
1430
1431	fninit
1432	add xSP, 20h
1433	EPILOGUE_4_ARGS 8
1434	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1435	%endmacro
1436
1437	IEMIMPL_FPU_R80_BY_I16 fiadd
1438	IEMIMPL_FPU_R80_BY_I16 fimul
1439	IEMIMPL_FPU_R80_BY_I16 fisub
1440	IEMIMPL_FPU_R80_BY_I16 fisubr
1441	IEMIMPL_FPU_R80_BY_I16 fidiv
1442	IEMIMPL_FPU_R80_BY_I16 fidivr
1443
1444
1445	;;
1446	; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1447	; only returning FSW.
1448	;
1449	; @param 1 The instruction
1450	;
1451	; @param A0 FPU context (fxsave).
1452	; @param A1 Where to store the output FSW.
1453	; @param A2 Pointer to the 80-bit value.
1454	; @param A3 Pointer to the 64-bit value.
1455	;
1456	%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1457	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1458	PROLOGUE_4_ARGS
1459	sub xSP, 20h
1460
1461	fninit
1462	fld tword [A2]
1463	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1464	%1 word [A3]
1465
1466	fnstsw word [A1]
1467
1468	fninit
1469	add xSP, 20h
1470	EPILOGUE_4_ARGS 8
1471	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1472	%endmacro
1473
1474	IEMIMPL_FPU_R80_BY_I16_FSW ficom
1475
1476
1477
1478	;
1479	;---------------------- 32-bit signed integer operations ----------------------
1480	;
1481
1482
1483	;;
1484	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1485	;
1486	; @param A0 FPU context (fxsave).
1487	; @param A1 Pointer to a IEMFPURESULT for the output.
1488	; @param A2 Pointer to the 32-bit floating point value to convert.
1489	;
1490	BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1491	PROLOGUE_3_ARGS
1492	sub xSP, 20h
1493
1494	fninit
1495	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1496	fild dword [A2]
1497
1498	fnstsw word [A1 + IEMFPURESULT.FSW]
1499	fnclex
1500	fstp tword [A1 + IEMFPURESULT.r80Result]
1501
1502	fninit
1503	add xSP, 20h
1504	EPILOGUE_3_ARGS 0
1505	ENDPROC iemAImpl_fild_i32_to_r80
1506
1507
1508	;;
1509	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1510	;
1511	; @param A0 FPU context (fxsave).
1512	; @param A1 Where to return the output FSW.
1513	; @param A2 Where to store the 32-bit signed integer value.
1514	; @param A3 Pointer to the 80-bit value.
1515	;
1516	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1517	PROLOGUE_4_ARGS
1518	sub xSP, 20h
1519
1520	fninit
1521	fld tword [A3]
1522	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1523	fistp dword [A2]
1524
1525	fnstsw word [A1]
1526
1527	fninit
1528	add xSP, 20h
1529	EPILOGUE_4_ARGS 0
1530	ENDPROC iemAImpl_fist_r80_to_i32
1531
1532
1533	;;
1534	; Store a 80-bit floating point value (register) as a 32-bit signed integer
1535	; (memory) with truncation.
1536	;
1537	; @param A0 FPU context (fxsave).
1538	; @param A1 Where to return the output FSW.
1539	; @param A2 Where to store the 32-bit signed integer value.
1540	; @param A3 Pointer to the 80-bit value.
1541	;
1542	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1543	PROLOGUE_4_ARGS
1544	sub xSP, 20h
1545
1546	fninit
1547	fld tword [A3]
1548	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1549	fisttp dword [A2]
1550
1551	fnstsw word [A1]
1552
1553	fninit
1554	add xSP, 20h
1555	EPILOGUE_4_ARGS 0
1556	ENDPROC iemAImpl_fistt_r80_to_i32
1557
1558
1559	;;
1560	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1561	;
1562	; @param 1 The instruction
1563	;
1564	; @param A0 FPU context (fxsave).
1565	; @param A1 Pointer to a IEMFPURESULT for the output.
1566	; @param A2 Pointer to the 80-bit value.
1567	; @param A3 Pointer to the 32-bit value.
1568	;
1569	%macro IEMIMPL_FPU_R80_BY_I32 1
1570	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1571	PROLOGUE_4_ARGS
1572	sub xSP, 20h
1573
1574	fninit
1575	fld tword [A2]
1576	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1577	%1 dword [A3]
1578
1579	fnstsw word [A1 + IEMFPURESULT.FSW]
1580	fnclex
1581	fstp tword [A1 + IEMFPURESULT.r80Result]
1582
1583	fninit
1584	add xSP, 20h
1585	EPILOGUE_4_ARGS 8
1586	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1587	%endmacro
1588
1589	IEMIMPL_FPU_R80_BY_I32 fiadd
1590	IEMIMPL_FPU_R80_BY_I32 fimul
1591	IEMIMPL_FPU_R80_BY_I32 fisub
1592	IEMIMPL_FPU_R80_BY_I32 fisubr
1593	IEMIMPL_FPU_R80_BY_I32 fidiv
1594	IEMIMPL_FPU_R80_BY_I32 fidivr
1595
1596
1597	;;
1598	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1599	; only returning FSW.
1600	;
1601	; @param 1 The instruction
1602	;
1603	; @param A0 FPU context (fxsave).
1604	; @param A1 Where to store the output FSW.
1605	; @param A2 Pointer to the 80-bit value.
1606	; @param A3 Pointer to the 64-bit value.
1607	;
1608	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1609	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1610	PROLOGUE_4_ARGS
1611	sub xSP, 20h
1612
1613	fninit
1614	fld tword [A2]
1615	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1616	%1 dword [A3]
1617
1618	fnstsw word [A1]
1619
1620	fninit
1621	add xSP, 20h
1622	EPILOGUE_4_ARGS 8
1623	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1624	%endmacro
1625
1626	IEMIMPL_FPU_R80_BY_I32_FSW ficom
1627
1628
1629
1630	;
1631	;---------------------- 64-bit signed integer operations ----------------------
1632	;
1633
1634
1635	;;
1636	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1637	;
1638	; @param A0 FPU context (fxsave).
1639	; @param A1 Pointer to a IEMFPURESULT for the output.
1640	; @param A2 Pointer to the 64-bit floating point value to convert.
1641	;
1642	BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1643	PROLOGUE_3_ARGS
1644	sub xSP, 20h
1645
1646	fninit
1647	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1648	fild qword [A2]
1649
1650	fnstsw word [A1 + IEMFPURESULT.FSW]
1651	fnclex
1652	fstp tword [A1 + IEMFPURESULT.r80Result]
1653
1654	fninit
1655	add xSP, 20h
1656	EPILOGUE_3_ARGS 0
1657	ENDPROC iemAImpl_fild_i64_to_r80
1658
1659
1660	;;
1661	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1662	;
1663	; @param A0 FPU context (fxsave).
1664	; @param A1 Where to return the output FSW.
1665	; @param A2 Where to store the 64-bit signed integer value.
1666	; @param A3 Pointer to the 80-bit value.
1667	;
1668	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
1669	PROLOGUE_4_ARGS
1670	sub xSP, 20h
1671
1672	fninit
1673	fld tword [A3]
1674	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1675	fistp qword [A2]
1676
1677	fnstsw word [A1]
1678
1679	fninit
1680	add xSP, 20h
1681	EPILOGUE_4_ARGS 0
1682	ENDPROC iemAImpl_fist_r80_to_i64
1683
1684
1685	;;
1686	; Store a 80-bit floating point value (register) as a 64-bit signed integer
1687	; (memory) with truncation.
1688	;
1689	; @param A0 FPU context (fxsave).
1690	; @param A1 Where to return the output FSW.
1691	; @param A2 Where to store the 64-bit signed integer value.
1692	; @param A3 Pointer to the 80-bit value.
1693	;
1694	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
1695	PROLOGUE_4_ARGS
1696	sub xSP, 20h
1697
1698	fninit
1699	fld tword [A3]
1700	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1701	fisttp qword [A2]
1702
1703	fnstsw word [A1]
1704
1705	fninit
1706	add xSP, 20h
1707	EPILOGUE_4_ARGS 0
1708	ENDPROC iemAImpl_fistt_r80_to_i64
1709
1710
1711
1712	;
1713	;---------------------- 32-bit floating point operations ----------------------
1714	;
1715
1716	;;
1717	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1718	;
1719	; @param A0 FPU context (fxsave).
1720	; @param A1 Pointer to a IEMFPURESULT for the output.
1721	; @param A2 Pointer to the 32-bit floating point value to convert.
1722	;
1723	BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
1724	PROLOGUE_3_ARGS
1725	sub xSP, 20h
1726
1727	fninit
1728	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1729	fld dword [A2]
1730
1731	fnstsw word [A1 + IEMFPURESULT.FSW]
1732	fnclex
1733	fstp tword [A1 + IEMFPURESULT.r80Result]
1734
1735	fninit
1736	add xSP, 20h
1737	EPILOGUE_3_ARGS 0
1738	ENDPROC iemAImpl_fld_r32_to_r80
1739
1740
1741	;;
1742	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
1743	;
1744	; @param A0 FPU context (fxsave).
1745	; @param A1 Where to return the output FSW.
1746	; @param A2 Where to store the 32-bit value.
1747	; @param A3 Pointer to the 80-bit value.
1748	;
1749	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
1750	PROLOGUE_4_ARGS
1751	sub xSP, 20h
1752
1753	fninit
1754	fld tword [A3]
1755	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1756	fst dword [A2]
1757
1758	fnstsw word [A1]
1759
1760	fninit
1761	add xSP, 20h
1762	EPILOGUE_4_ARGS 0
1763	ENDPROC iemAImpl_fst_r80_to_r32
1764
1765
1766	;;
1767	; FPU instruction working on one 80-bit and one 32-bit floating point value.
1768	;
1769	; @param 1 The instruction
1770	;
1771	; @param A0 FPU context (fxsave).
1772	; @param A1 Pointer to a IEMFPURESULT for the output.
1773	; @param A2 Pointer to the 80-bit value.
1774	; @param A3 Pointer to the 32-bit value.
1775	;
1776	%macro IEMIMPL_FPU_R80_BY_R32 1
1777	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
1778	PROLOGUE_4_ARGS
1779	sub xSP, 20h
1780
1781	fninit
1782	fld tword [A2]
1783	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1784	%1 dword [A3]
1785
1786	fnstsw word [A1 + IEMFPURESULT.FSW]
1787	fnclex
1788	fstp tword [A1 + IEMFPURESULT.r80Result]
1789
1790	fninit
1791	add xSP, 20h
1792	EPILOGUE_4_ARGS 8
1793	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
1794	%endmacro
1795
1796	IEMIMPL_FPU_R80_BY_R32 fadd
1797	IEMIMPL_FPU_R80_BY_R32 fmul
1798	IEMIMPL_FPU_R80_BY_R32 fsub
1799	IEMIMPL_FPU_R80_BY_R32 fsubr
1800	IEMIMPL_FPU_R80_BY_R32 fdiv
1801	IEMIMPL_FPU_R80_BY_R32 fdivr
1802
1803
1804	;;
1805	; FPU instruction working on one 80-bit and one 32-bit floating point value,
1806	; only returning FSW.
1807	;
1808	; @param 1 The instruction
1809	;
1810	; @param A0 FPU context (fxsave).
1811	; @param A1 Where to store the output FSW.
1812	; @param A2 Pointer to the 80-bit value.
1813	; @param A3 Pointer to the 64-bit value.
1814	;
1815	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
1816	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
1817	PROLOGUE_4_ARGS
1818	sub xSP, 20h
1819
1820	fninit
1821	fld tword [A2]
1822	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1823	%1 dword [A3]
1824
1825	fnstsw word [A1]
1826
1827	fninit
1828	add xSP, 20h
1829	EPILOGUE_4_ARGS 8
1830	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
1831	%endmacro
1832
1833	IEMIMPL_FPU_R80_BY_R32_FSW fcom
1834
1835
1836
1837	;
1838	;---------------------- 64-bit floating point operations ----------------------
1839	;
1840
1841	;;
1842	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1843	;
1844	; @param A0 FPU context (fxsave).
1845	; @param A1 Pointer to a IEMFPURESULT for the output.
1846	; @param A2 Pointer to the 64-bit floating point value to convert.
1847	;
1848	BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
1849	PROLOGUE_3_ARGS
1850	sub xSP, 20h
1851
1852	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1853	fld qword [A2]
1854
1855	fnstsw word [A1 + IEMFPURESULT.FSW]
1856	fnclex
1857	fstp tword [A1 + IEMFPURESULT.r80Result]
1858
1859	fninit
1860	add xSP, 20h
1861	EPILOGUE_3_ARGS 0
1862	ENDPROC iemAImpl_fld_r64_to_r80
1863
1864
1865	;;
1866	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
1867	;
1868	; @param A0 FPU context (fxsave).
1869	; @param A1 Where to return the output FSW.
1870	; @param A2 Where to store the 64-bit value.
1871	; @param A3 Pointer to the 80-bit value.
1872	;
1873	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
1874	PROLOGUE_4_ARGS
1875	sub xSP, 20h
1876
1877	fninit
1878	fld tword [A3]
1879	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1880	fst qword [A2]
1881
1882	fnstsw word [A1]
1883
1884	fninit
1885	add xSP, 20h
1886	EPILOGUE_4_ARGS 0
1887	ENDPROC iemAImpl_fst_r80_to_r64
1888
1889
1890	;;
1891	; FPU instruction working on one 80-bit and one 64-bit floating point value.
1892	;
1893	; @param 1 The instruction
1894	;
1895	; @param A0 FPU context (fxsave).
1896	; @param A1 Pointer to a IEMFPURESULT for the output.
1897	; @param A2 Pointer to the 80-bit value.
1898	; @param A3 Pointer to the 64-bit value.
1899	;
1900	%macro IEMIMPL_FPU_R80_BY_R64 1
1901	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
1902	PROLOGUE_4_ARGS
1903	sub xSP, 20h
1904
1905	fninit
1906	fld tword [A2]
1907	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1908	%1 qword [A3]
1909
1910	fnstsw word [A1 + IEMFPURESULT.FSW]
1911	fnclex
1912	fstp tword [A1 + IEMFPURESULT.r80Result]
1913
1914	fninit
1915	add xSP, 20h
1916	EPILOGUE_4_ARGS 8
1917	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
1918	%endmacro
1919
1920	IEMIMPL_FPU_R80_BY_R64 fadd
1921	IEMIMPL_FPU_R80_BY_R64 fmul
1922	IEMIMPL_FPU_R80_BY_R64 fsub
1923	IEMIMPL_FPU_R80_BY_R64 fsubr
1924	IEMIMPL_FPU_R80_BY_R64 fdiv
1925	IEMIMPL_FPU_R80_BY_R64 fdivr
1926
1927	;;
1928	; FPU instruction working on one 80-bit and one 64-bit floating point value,
1929	; only returning FSW.
1930	;
1931	; @param 1 The instruction
1932	;
1933	; @param A0 FPU context (fxsave).
1934	; @param A1 Where to store the output FSW.
1935	; @param A2 Pointer to the 80-bit value.
1936	; @param A3 Pointer to the 64-bit value.
1937	;
1938	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
1939	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
1940	PROLOGUE_4_ARGS
1941	sub xSP, 20h
1942
1943	fninit
1944	fld tword [A2]
1945	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1946	%1 qword [A3]
1947
1948	fnstsw word [A1]
1949
1950	fninit
1951	add xSP, 20h
1952	EPILOGUE_4_ARGS 8
1953	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
1954	%endmacro
1955
1956	IEMIMPL_FPU_R80_BY_R64_FSW fcom
1957
1958
1959
1960	;
1961	;---------------------- 80-bit floating point operations ----------------------
1962	;
1963
1964	;;
1965	; Loads a 80-bit floating point register value from memory.
1966	;
1967	; @param A0 FPU context (fxsave).
1968	; @param A1 Pointer to a IEMFPURESULT for the output.
1969	; @param A2 Pointer to the 80-bit floating point value to load.
1970	;
1971	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
1972	PROLOGUE_3_ARGS
1973	sub xSP, 20h
1974
1975	fninit
1976	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1977	fld tword [A2]
1978
1979	fnstsw word [A1 + IEMFPURESULT.FSW]
1980	fnclex
1981	fstp tword [A1 + IEMFPURESULT.r80Result]
1982
1983	fninit
1984	add xSP, 20h
1985	EPILOGUE_3_ARGS 0
1986	ENDPROC iemAImpl_fld_r80_from_r80
1987
1988
1989	;;
1990	; Store a 80-bit floating point register to memory
1991	;
1992	; @param A0 FPU context (fxsave).
1993	; @param A1 Where to return the output FSW.
1994	; @param A2 Where to store the 80-bit value.
1995	; @param A3 Pointer to the 80-bit register value.
1996	;
1997	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
1998	PROLOGUE_4_ARGS
1999	sub xSP, 20h
2000
2001	fninit
2002	fld tword [A3]
2003	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2004	fstp tword [A2]
2005
2006	fnstsw word [A1]
2007
2008	fninit
2009	add xSP, 20h
2010	EPILOGUE_4_ARGS 0
2011	ENDPROC iemAImpl_fst_r80_to_r80
2012
2013
2014	;;
2015	; FPU instruction working on two 80-bit floating point values.
2016	;
2017	; @param 1 The instruction
2018	;
2019	; @param A0 FPU context (fxsave).
2020	; @param A1 Pointer to a IEMFPURESULT for the output.
2021	; @param A2 Pointer to the first 80-bit value (ST0)
2022	; @param A3 Pointer to the second 80-bit value (STn).
2023	;
2024	%macro IEMIMPL_FPU_R80_BY_R80 2
2025	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2026	PROLOGUE_4_ARGS
2027	sub xSP, 20h
2028
2029	fninit
2030	fld tword [A3]
2031	fld tword [A2]
2032	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2033	%1 %2
2034
2035	fnstsw word [A1 + IEMFPURESULT.FSW]
2036	fnclex
2037	fstp tword [A1 + IEMFPURESULT.r80Result]
2038
2039	fninit
2040	add xSP, 20h
2041	EPILOGUE_4_ARGS 8
2042	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2043	%endmacro
2044
2045	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2046	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2047	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2048	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2049	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2050	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2051	IEMIMPL_FPU_R80_BY_R80 fprem, {}
2052	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2053	IEMIMPL_FPU_R80_BY_R80 fscale, {}
2054
2055
2056	;;
2057	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2058	; storing the result in ST1 and popping the stack.
2059	;
2060	; @param 1 The instruction
2061	;
2062	; @param A0 FPU context (fxsave).
2063	; @param A1 Pointer to a IEMFPURESULT for the output.
2064	; @param A2 Pointer to the first 80-bit value (ST1).
2065	; @param A3 Pointer to the second 80-bit value (ST0).
2066	;
2067	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2068	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2069	PROLOGUE_4_ARGS
2070	sub xSP, 20h
2071
2072	fninit
2073	fld tword [A2]
2074	fld tword [A3]
2075	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2076	%1
2077
2078	fnstsw word [A1 + IEMFPURESULT.FSW]
2079	fnclex
2080	fstp tword [A1 + IEMFPURESULT.r80Result]
2081
2082	fninit
2083	add xSP, 20h
2084	EPILOGUE_4_ARGS 8
2085	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2086	%endmacro
2087
2088	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2089	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2090
2091
2092	;;
2093	; FPU instruction working on two 80-bit floating point values, only
2094	; returning FSW.
2095	;
2096	; @param 1 The instruction
2097	;
2098	; @param A0 FPU context (fxsave).
2099	; @param A1 Pointer to a uint16_t for the resulting FSW.
2100	; @param A2 Pointer to the first 80-bit value.
2101	; @param A3 Pointer to the second 80-bit value.
2102	;
2103	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2104	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2105	PROLOGUE_4_ARGS
2106	sub xSP, 20h
2107
2108	fninit
2109	fld tword [A3]
2110	fld tword [A2]
2111	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2112	%1 st0, st1
2113
2114	fnstsw word [A1]
2115
2116	fninit
2117	add xSP, 20h
2118	EPILOGUE_4_ARGS 8
2119	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2120	%endmacro
2121
2122	IEMIMPL_FPU_R80_BY_R80_FSW fcom
2123	IEMIMPL_FPU_R80_BY_R80_FSW fucom
2124
2125
2126	;;
2127	; FPU instruction working on two 80-bit floating point values,
2128	; returning FSW and EFLAGS (eax).
2129	;
2130	; @param 1 The instruction
2131	;
2132	; @returns EFLAGS in EAX.
2133	; @param A0 FPU context (fxsave).
2134	; @param A1 Pointer to a uint16_t for the resulting FSW.
2135	; @param A2 Pointer to the first 80-bit value.
2136	; @param A3 Pointer to the second 80-bit value.
2137	;
2138	%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2139	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2140	PROLOGUE_4_ARGS
2141	sub xSP, 20h
2142
2143	fninit
2144	fld tword [A3]
2145	fld tword [A2]
2146	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2147	%1 st1
2148
2149	fnstsw word [A1]
2150	pushf
2151	pop xAX
2152
2153	fninit
2154	add xSP, 20h
2155	EPILOGUE_4_ARGS 8
2156	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2157	%endmacro
2158
2159	IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2160	IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2161
2162
2163	;;
2164	; FPU instruction working on one 80-bit floating point value.
2165	;
2166	; @param 1 The instruction
2167	;
2168	; @param A0 FPU context (fxsave).
2169	; @param A1 Pointer to a IEMFPURESULT for the output.
2170	; @param A2 Pointer to the 80-bit value.
2171	;
2172	%macro IEMIMPL_FPU_R80 1
2173	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2174	PROLOGUE_3_ARGS
2175	sub xSP, 20h
2176
2177	fninit
2178	fld tword [A2]
2179	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2180	%1
2181
2182	fnstsw word [A1 + IEMFPURESULT.FSW]
2183	fnclex
2184	fstp tword [A1 + IEMFPURESULT.r80Result]
2185
2186	fninit
2187	add xSP, 20h
2188	EPILOGUE_3_ARGS 4
2189	ENDPROC iemAImpl_ %+ %1 %+ _r80
2190	%endmacro
2191
2192	IEMIMPL_FPU_R80 fchs
2193	IEMIMPL_FPU_R80 fabs
2194	IEMIMPL_FPU_R80 f2xm1
2195	IEMIMPL_FPU_R80 fyl2x
2196	IEMIMPL_FPU_R80 fsqrt
2197	IEMIMPL_FPU_R80 frndint
2198	IEMIMPL_FPU_R80 fsin
2199	IEMIMPL_FPU_R80 fcos
2200
2201
2202	;;
2203	; FPU instruction working on one 80-bit floating point value, only
2204	; returning FSW.
2205	;
2206	; @param 1 The instruction
2207	;
2208	; @param A0 FPU context (fxsave).
2209	; @param A1 Pointer to a uint16_t for the resulting FSW.
2210	; @param A2 Pointer to the 80-bit value.
2211	;
2212	%macro IEMIMPL_FPU_R80_FSW 1
2213	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2214	PROLOGUE_3_ARGS
2215	sub xSP, 20h
2216
2217	fninit
2218	fld tword [A2]
2219	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2220	%1
2221
2222	fnstsw word [A1]
2223
2224	fninit
2225	add xSP, 20h
2226	EPILOGUE_3_ARGS 4
2227	ENDPROC iemAImpl_ %+ %1 %+ _r80
2228	%endmacro
2229
2230	IEMIMPL_FPU_R80_FSW ftst
2231	IEMIMPL_FPU_R80_FSW fxam
2232
2233
2234
2235	;;
2236	; FPU instruction loading a 80-bit floating point constant.
2237	;
2238	; @param 1 The instruction
2239	;
2240	; @param A0 FPU context (fxsave).
2241	; @param A1 Pointer to a IEMFPURESULT for the output.
2242	;
2243	%macro IEMIMPL_FPU_R80_CONST 1
2244	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2245	PROLOGUE_2_ARGS
2246	sub xSP, 20h
2247
2248	fninit
2249	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2250	%1
2251
2252	fnstsw word [A1 + IEMFPURESULT.FSW]
2253	fnclex
2254	fstp tword [A1 + IEMFPURESULT.r80Result]
2255
2256	fninit
2257	add xSP, 20h
2258	EPILOGUE_2_ARGS 0
2259	ENDPROC iemAImpl_ %+ %1 %+
2260	%endmacro
2261
2262	IEMIMPL_FPU_R80_CONST fld1
2263	IEMIMPL_FPU_R80_CONST fldl2t
2264	IEMIMPL_FPU_R80_CONST fldl2e
2265	IEMIMPL_FPU_R80_CONST fldpi
2266	IEMIMPL_FPU_R80_CONST fldlg2
2267	IEMIMPL_FPU_R80_CONST fldln2
2268	IEMIMPL_FPU_R80_CONST fldz
2269
2270
2271	;;
2272	; FPU instruction working on one 80-bit floating point value, outputing two.
2273	;
2274	; @param 1 The instruction
2275	;
2276	; @param A0 FPU context (fxsave).
2277	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2278	; @param A2 Pointer to the 80-bit value.
2279	;
2280	%macro IEMIMPL_FPU_R80_R80 1
2281	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2282	PROLOGUE_3_ARGS
2283	sub xSP, 20h
2284
2285	fninit
2286	fld tword [A2]
2287	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2288	%1
2289
2290	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2291	fnclex
2292	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2293	fnclex
2294	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2295
2296	fninit
2297	add xSP, 20h
2298	EPILOGUE_3_ARGS 4
2299	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2300	%endmacro
2301
2302	IEMIMPL_FPU_R80_R80 fptan
2303	IEMIMPL_FPU_R80_R80 fxtract
2304	IEMIMPL_FPU_R80_R80 fsincos
2305

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 41829

Download in other formats: