IEMAllAImpl.asm@ 40248

Last change on this file since 40248 was 40248, checked in by vboxsync, 13 years ago
IEM: fst[p], fist[p] and fisttp implementations and fixes.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 58.0 KB

Line
1	; $Id: IEMAllAImpl.asm 40248 2012-02-24 16:12:05Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	; Copyright (C) 2011-2012 Oracle Corporation
7	;
8	; This file is part of VirtualBox Open Source Edition (OSE), as
9	; available from http://www.virtualbox.org. This file is free software;
10	; you can redistribute it and/or modify it under the terms of the GNU
11	; General Public License (GPL) as published by the Free Software
12	; Foundation, in version 2 as it comes in the "COPYING" file of the
13	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	;
16
17
18	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19	; Header Files ;
20	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21	%include "VBox/asmdefs.mac"
22	%include "VBox/err.mac"
23	%include "iprt/x86.mac"
24
25
26	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27	; Defined Constants And Macros ;
28	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30	;;
31	; RET XX / RET wrapper for fastcall.
32	;
33	%macro RET_FASTCALL 1
34	%ifdef RT_ARCH_X86
35	%ifdef RT_OS_WINDOWS
36	ret %1
37	%else
38	ret
39	%endif
40	%else
41	ret
42	%endif
43	%endmacro
44
45	;;
46	; NAME for fastcall functions.
47	;
48	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49	; escaping (or whatever the dollar is good for here). Thus the ugly
50	; prefix argument.
51	;
52	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Dollar) NAME(a_Name)
53	%ifdef RT_ARCH_X86
54	%ifdef RT_OS_WINDOWS
55	%undef NAME_FASTCALL
56	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57	%endif
58	%endif
59
60	;;
61	; BEGINPROC for fastcall functions.
62	;
63	; @param 1 The function name (C).
64	; @param 2 The argument size on x86.
65	;
66	%macro BEGINPROC_FASTCALL 2
67	%ifdef ASM_FORMAT_PE
68	export %1=NAME_FASTCALL(%1,%2,$@)
69	%endif
70	%ifdef __NASM__
71	%ifdef ASM_FORMAT_OMF
72	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73	%endif
74	%endif
75	%ifndef ASM_FORMAT_BIN
76	global NAME_FASTCALL(%1,%2,$@)
77	%endif
78	NAME_FASTCALL(%1,%2,@):
79	%endmacro
80
81
82	;
83	; We employ some macro assembly here to hid the calling convention differences.
84	;
85	%ifdef RT_ARCH_AMD64
86	%macro PROLOGUE_1_ARGS 0
87	%endmacro
88	%macro EPILOGUE_1_ARGS 1
89	ret
90	%endmacro
91
92	%macro PROLOGUE_2_ARGS 0
93	%endmacro
94	%macro EPILOGUE_2_ARGS 1
95	ret
96	%endmacro
97
98	%macro PROLOGUE_3_ARGS 0
99	%endmacro
100	%macro EPILOGUE_3_ARGS 1
101	ret
102	%endmacro
103
104	%macro PROLOGUE_4_ARGS 0
105	%endmacro
106	%macro EPILOGUE_4_ARGS 1
107	ret
108	%endmacro
109
110	%ifdef ASM_CALL64_GCC
111	%define A0 rdi
112	%define A0_32 edi
113	%define A0_16 di
114	%define A0_8 dil
115
116	%define A1 rsi
117	%define A1_32 esi
118	%define A1_16 si
119	%define A1_8 sil
120
121	%define A2 rdx
122	%define A2_32 edx
123	%define A2_16 dx
124	%define A2_8 dl
125
126	%define A3 rcx
127	%define A3_32 ecx
128	%define A3_16 cx
129	%endif
130
131	%ifdef ASM_CALL64_MSC
132	%define A0 rcx
133	%define A0_32 ecx
134	%define A0_16 cx
135	%define A0_8 cl
136
137	%define A1 rdx
138	%define A1_32 edx
139	%define A1_16 dx
140	%define A1_8 dl
141
142	%define A2 r8
143	%define A2_32 r8d
144	%define A2_16 r8w
145	%define A2_8 r8b
146
147	%define A3 r9
148	%define A3_32 r9d
149	%define A3_16 r9w
150	%endif
151
152	%define T0 rax
153	%define T0_32 eax
154	%define T0_16 ax
155	%define T0_8 al
156
157	%define T1 r11
158	%define T1_32 r11d
159	%define T1_16 r11w
160	%define T1_8 r11b
161
162	%else
163	; x86
164	%macro PROLOGUE_1_ARGS 0
165	push edi
166	%endmacro
167	%macro EPILOGUE_1_ARGS 1
168	pop edi
169	ret %1
170	%endmacro
171
172	%macro PROLOGUE_2_ARGS 0
173	push edi
174	%endmacro
175	%macro EPILOGUE_2_ARGS 1
176	pop edi
177	ret %1
178	%endmacro
179
180	%macro PROLOGUE_3_ARGS 0
181	push ebx
182	mov ebx, [esp + 4 + 4]
183	push edi
184	%endmacro
185	%macro EPILOGUE_3_ARGS 1
186	pop edi
187	pop ebx
188	ret %1
189	%endmacro
190
191	%macro PROLOGUE_4_ARGS 0
192	push ebx
193	push edi
194	push esi
195	mov ebx, [esp + 12 + 4 + 0]
196	mov esi, [esp + 12 + 4 + 4]
197	%endmacro
198	%macro EPILOGUE_4_ARGS 1
199	pop esi
200	pop edi
201	pop ebx
202	ret %1
203	%endmacro
204
205	%define A0 ecx
206	%define A0_32 ecx
207	%define A0_16 cx
208	%define A0_8 cl
209
210	%define A1 edx
211	%define A1_32 edx
212	%define A1_16 dx
213	%define A1_8 dl
214
215	%define A2 ebx
216	%define A2_32 ebx
217	%define A2_16 bx
218	%define A2_8 bl
219
220	%define A3 esi
221	%define A3_32 esi
222	%define A3_16 si
223
224	%define T0 eax
225	%define T0_32 eax
226	%define T0_16 ax
227	%define T0_8 al
228
229	%define T1 edi
230	%define T1_32 edi
231	%define T1_16 di
232	%endif
233
234
235	;;
236	; Load the relevant flags from [%1] if there are undefined flags (%3).
237	;
238	; @remarks Clobbers T0, stack. Changes EFLAGS.
239	; @param A2 The register pointing to the flags.
240	; @param 1 The parameter (A0..A3) pointing to the eflags.
241	; @param 2 The set of modified flags.
242	; @param 3 The set of undefined flags.
243	;
244	%macro IEM_MAYBE_LOAD_FLAGS 3
245	;%if (%3) != 0
246	pushf ; store current flags
247	mov T0_32, [%1] ; load the guest flags
248	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
249	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
250	or [xSP], T0 ; merge guest flags with host flags.
251	popf ; load the mixed flags.
252	;%endif
253	%endmacro
254
255	;;
256	; Update the flag.
257	;
258	; @remarks Clobbers T0, T1, stack.
259	; @param 1 The register pointing to the EFLAGS.
260	; @param 2 The mask of modified flags to save.
261	; @param 3 The mask of undefined flags to (maybe) save.
262	;
263	%macro IEM_SAVE_FLAGS 3
264	%if (%2 \| %3) != 0
265	pushf
266	pop T1
267	mov T0_32, [%1] ; flags
268	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
269	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
270	or T0_32, T1_32 ; combine the flags.
271	mov [%1], T0_32 ; save the flags.
272	%endif
273	%endmacro
274
275
276	;;
277	; Macro for implementing a binary operator.
278	;
279	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
280	; variants, except on 32-bit system where the 64-bit accesses requires hand
281	; coding.
282	;
283	; All the functions takes a pointer to the destination memory operand in A0,
284	; the source register operand in A1 and a pointer to eflags in A2.
285	;
286	; @param 1 The instruction mnemonic.
287	; @param 2 Non-zero if there should be a locked version.
288	; @param 3 The modified flags.
289	; @param 4 The undefined flags.
290	;
291	%macro IEMIMPL_BIN_OP 4
292	BEGINCODE
293	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
294	PROLOGUE_3_ARGS
295	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
296	%1 byte [A0], A1_8
297	IEM_SAVE_FLAGS A2, %3, %4
298	EPILOGUE_3_ARGS 4
299	ENDPROC iemAImpl_ %+ %1 %+ _u8
300
301	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
302	PROLOGUE_3_ARGS
303	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
304	%1 word [A0], A1_16
305	IEM_SAVE_FLAGS A2, %3, %4
306	EPILOGUE_3_ARGS 4
307	ENDPROC iemAImpl_ %+ %1 %+ _u16
308
309	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
310	PROLOGUE_3_ARGS
311	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
312	%1 dword [A0], A1_32
313	IEM_SAVE_FLAGS A2, %3, %4
314	EPILOGUE_3_ARGS 4
315	ENDPROC iemAImpl_ %+ %1 %+ _u32
316
317	%ifdef RT_ARCH_AMD64
318	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
319	PROLOGUE_3_ARGS
320	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
321	%1 qword [A0], A1
322	IEM_SAVE_FLAGS A2, %3, %4
323	EPILOGUE_3_ARGS 8
324	ENDPROC iemAImpl_ %+ %1 %+ _u64
325	%else ; stub it for now - later, replace with hand coded stuff.
326	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
327	int3
328	ret
329	ENDPROC iemAImpl_ %+ %1 %+ _u64
330	%endif ; !RT_ARCH_AMD64
331
332	%if %2 != 0 ; locked versions requested?
333
334	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
335	PROLOGUE_3_ARGS
336	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337	lock %1 byte [A0], A1_8
338	IEM_SAVE_FLAGS A2, %3, %4
339	EPILOGUE_3_ARGS 4
340	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
341
342	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
343	PROLOGUE_3_ARGS
344	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345	lock %1 word [A0], A1_16
346	IEM_SAVE_FLAGS A2, %3, %4
347	EPILOGUE_3_ARGS 4
348	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
349
350	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
351	PROLOGUE_3_ARGS
352	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353	lock %1 dword [A0], A1_32
354	IEM_SAVE_FLAGS A2, %3, %4
355	EPILOGUE_3_ARGS 4
356	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
357
358	%ifdef RT_ARCH_AMD64
359	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
360	PROLOGUE_3_ARGS
361	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
362	lock %1 qword [A0], A1
363	IEM_SAVE_FLAGS A2, %3, %4
364	EPILOGUE_3_ARGS 8
365	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
366	%else ; stub it for now - later, replace with hand coded stuff.
367	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
368	int3
369	ret 8
370	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
371	%endif ; !RT_ARCH_AMD64
372	%endif ; locked
373	%endmacro
374
375	; instr,lock,modified-flags.
376	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
377	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
378	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
379	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
380	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
381	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
382	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
383	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
384	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
385
386
387	;;
388	; Macro for implementing a bit operator.
389	;
390	; This will generate code for the 16, 32 and 64 bit accesses with locked
391	; variants, except on 32-bit system where the 64-bit accesses requires hand
392	; coding.
393	;
394	; All the functions takes a pointer to the destination memory operand in A0,
395	; the source register operand in A1 and a pointer to eflags in A2.
396	;
397	; @param 1 The instruction mnemonic.
398	; @param 2 Non-zero if there should be a locked version.
399	; @param 3 The modified flags.
400	; @param 4 The undefined flags.
401	;
402	%macro IEMIMPL_BIT_OP 4
403	BEGINCODE
404	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
405	PROLOGUE_3_ARGS
406	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
407	%1 word [A0], A1_16
408	IEM_SAVE_FLAGS A2, %3, %4
409	EPILOGUE_3_ARGS 4
410	ENDPROC iemAImpl_ %+ %1 %+ _u16
411
412	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
413	PROLOGUE_3_ARGS
414	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
415	%1 dword [A0], A1_32
416	IEM_SAVE_FLAGS A2, %3, %4
417	EPILOGUE_3_ARGS 4
418	ENDPROC iemAImpl_ %+ %1 %+ _u32
419
420	%ifdef RT_ARCH_AMD64
421	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
422	PROLOGUE_3_ARGS
423	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
424	%1 qword [A0], A1
425	IEM_SAVE_FLAGS A2, %3, %4
426	EPILOGUE_3_ARGS 8
427	ENDPROC iemAImpl_ %+ %1 %+ _u64
428	%else ; stub it for now - later, replace with hand coded stuff.
429	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
430	int3
431	ret 8
432	ENDPROC iemAImpl_ %+ %1 %+ _u64
433	%endif ; !RT_ARCH_AMD64
434
435	%if %2 != 0 ; locked versions requested?
436
437	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
438	PROLOGUE_3_ARGS
439	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
440	lock %1 word [A0], A1_16
441	IEM_SAVE_FLAGS A2, %3, %4
442	EPILOGUE_3_ARGS 4
443	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
444
445	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
446	PROLOGUE_3_ARGS
447	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
448	lock %1 dword [A0], A1_32
449	IEM_SAVE_FLAGS A2, %3, %4
450	EPILOGUE_3_ARGS 4
451	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
452
453	%ifdef RT_ARCH_AMD64
454	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
455	PROLOGUE_3_ARGS
456	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
457	lock %1 qword [A0], A1
458	IEM_SAVE_FLAGS A2, %3, %4
459	EPILOGUE_3_ARGS 8
460	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
461	%else ; stub it for now - later, replace with hand coded stuff.
462	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
463	int3
464	ret 8
465	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
466	%endif ; !RT_ARCH_AMD64
467	%endif ; locked
468	%endmacro
469	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
470	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
471	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
472	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
473
474	;;
475	; Macro for implementing a bit search operator.
476	;
477	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
478	; system where the 64-bit accesses requires hand coding.
479	;
480	; All the functions takes a pointer to the destination memory operand in A0,
481	; the source register operand in A1 and a pointer to eflags in A2.
482	;
483	; @param 1 The instruction mnemonic.
484	; @param 2 The modified flags.
485	; @param 3 The undefined flags.
486	;
487	%macro IEMIMPL_BIT_OP 3
488	BEGINCODE
489	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
490	PROLOGUE_3_ARGS
491	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
492	%1 T0_16, A1_16
493	mov [A0], T0_16
494	IEM_SAVE_FLAGS A2, %2, %3
495	EPILOGUE_3_ARGS 4
496	ENDPROC iemAImpl_ %+ %1 %+ _u16
497
498	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
499	PROLOGUE_3_ARGS
500	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
501	%1 T0_32, A1_32
502	mov [A0], T0_32
503	IEM_SAVE_FLAGS A2, %2, %3
504	EPILOGUE_3_ARGS 4
505	ENDPROC iemAImpl_ %+ %1 %+ _u32
506
507	%ifdef RT_ARCH_AMD64
508	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
509	PROLOGUE_3_ARGS
510	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
511	%1 T0, A1
512	mov [A0], T0
513	IEM_SAVE_FLAGS A2, %2, %3
514	EPILOGUE_3_ARGS 8
515	ENDPROC iemAImpl_ %+ %1 %+ _u64
516	%else ; stub it for now - later, replace with hand coded stuff.
517	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
518	int3
519	ret 8
520	ENDPROC iemAImpl_ %+ %1 %+ _u64
521	%endif ; !RT_ARCH_AMD64
522	%endmacro
523	IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
524	IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
525
526
527	;
528	; IMUL is also a similar but yet different case (no lock, no mem dst).
529	; The rDX:rAX variant of imul is handled together with mul further down.
530	;
531	BEGINCODE
532	BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
533	PROLOGUE_3_ARGS
534	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
535	imul A1_16, word [A0]
536	mov [A0], A1_16
537	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
538	EPILOGUE_3_ARGS 4
539	ENDPROC iemAImpl_imul_two_u16
540
541	BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
542	PROLOGUE_3_ARGS
543	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
544	imul A1_32, dword [A0]
545	mov [A0], A1_32
546	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
547	EPILOGUE_3_ARGS 4
548	ENDPROC iemAImpl_imul_two_u32
549
550	BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
551	PROLOGUE_3_ARGS
552	%ifdef RT_ARCH_AMD64
553	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
554	imul A1, qword [A0]
555	mov [A0], A1
556	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
557	%else
558	int3 ;; @todo implement me
559	%endif
560	EPILOGUE_3_ARGS 8
561	ENDPROC iemAImpl_imul_two_u64
562
563
564	;
565	; XCHG for memory operands. This implies locking. No flag changes.
566	;
567	; Each function takes two arguments, first the pointer to the memory,
568	; then the pointer to the register. They all return void.
569	;
570	BEGINCODE
571	BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
572	PROLOGUE_2_ARGS
573	mov T0_8, [A1]
574	xchg [A0], T0_8
575	mov [A1], T0_8
576	EPILOGUE_2_ARGS 0
577	ENDPROC iemAImpl_xchg_u8
578
579	BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
580	PROLOGUE_2_ARGS
581	mov T0_16, [A1]
582	xchg [A0], T0_16
583	mov [A1], T0_16
584	EPILOGUE_2_ARGS 0
585	ENDPROC iemAImpl_xchg_u16
586
587	BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
588	PROLOGUE_2_ARGS
589	mov T0_32, [A1]
590	xchg [A0], T0_32
591	mov [A1], T0_32
592	EPILOGUE_2_ARGS 0
593	ENDPROC iemAImpl_xchg_u32
594
595	BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
596	%ifdef RT_ARCH_AMD64
597	PROLOGUE_2_ARGS
598	mov T0, [A1]
599	xchg [A0], T0
600	mov [A1], T0
601	EPILOGUE_2_ARGS 0
602	%else
603	int3
604	ret 0
605	%endif
606	ENDPROC iemAImpl_xchg_u64
607
608
609	;
610	; XADD for memory operands.
611	;
612	; Each function takes three arguments, first the pointer to the
613	; memory/register, then the pointer to the register, and finally a pointer to
614	; eflags. They all return void.
615	;
616	BEGINCODE
617	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
618	PROLOGUE_3_ARGS
619	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
620	mov T0_8, [A1]
621	xadd [A0], T0_8
622	mov [A1], T0_8
623	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
624	EPILOGUE_3_ARGS 4
625	ENDPROC iemAImpl_xadd_u8
626
627	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
628	PROLOGUE_3_ARGS
629	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
630	mov T0_16, [A1]
631	xadd [A0], T0_16
632	mov [A1], T0_16
633	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
634	EPILOGUE_3_ARGS 4
635	ENDPROC iemAImpl_xadd_u16
636
637	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
638	PROLOGUE_3_ARGS
639	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
640	mov T0_32, [A1]
641	xadd [A0], T0_32
642	mov [A1], T0_32
643	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
644	EPILOGUE_3_ARGS 4
645	ENDPROC iemAImpl_xadd_u32
646
647	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
648	%ifdef RT_ARCH_AMD64
649	PROLOGUE_3_ARGS
650	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
651	mov T0, [A1]
652	xadd [A0], T0
653	mov [A1], T0
654	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
655	EPILOGUE_3_ARGS 4
656	%else
657	int3
658	ret 4
659	%endif
660	ENDPROC iemAImpl_xadd_u64
661
662	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
663	PROLOGUE_3_ARGS
664	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
665	mov T0_8, [A1]
666	lock xadd [A0], T0_8
667	mov [A1], T0_8
668	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
669	EPILOGUE_3_ARGS 4
670	ENDPROC iemAImpl_xadd_u8_locked
671
672	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
673	PROLOGUE_3_ARGS
674	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
675	mov T0_16, [A1]
676	lock xadd [A0], T0_16
677	mov [A1], T0_16
678	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
679	EPILOGUE_3_ARGS 4
680	ENDPROC iemAImpl_xadd_u16_locked
681
682	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
683	PROLOGUE_3_ARGS
684	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
685	mov T0_32, [A1]
686	lock xadd [A0], T0_32
687	mov [A1], T0_32
688	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
689	EPILOGUE_3_ARGS 4
690	ENDPROC iemAImpl_xadd_u32_locked
691
692	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
693	%ifdef RT_ARCH_AMD64
694	PROLOGUE_3_ARGS
695	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
696	mov T0, [A1]
697	lock xadd [A0], T0
698	mov [A1], T0
699	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
700	EPILOGUE_3_ARGS 4
701	%else
702	int3
703	ret 4
704	%endif
705	ENDPROC iemAImpl_xadd_u64_locked
706
707
708	;;
709	; Macro for implementing a unary operator.
710	;
711	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
712	; variants, except on 32-bit system where the 64-bit accesses requires hand
713	; coding.
714	;
715	; All the functions takes a pointer to the destination memory operand in A0,
716	; the source register operand in A1 and a pointer to eflags in A2.
717	;
718	; @param 1 The instruction mnemonic.
719	; @param 2 The modified flags.
720	; @param 3 The undefined flags.
721	;
722	%macro IEMIMPL_UNARY_OP 3
723	BEGINCODE
724	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
725	PROLOGUE_2_ARGS
726	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
727	%1 byte [A0]
728	IEM_SAVE_FLAGS A1, %2, %3
729	EPILOGUE_2_ARGS 0
730	ENDPROC iemAImpl_ %+ %1 %+ _u8
731
732	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
733	PROLOGUE_2_ARGS
734	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
735	lock %1 byte [A0]
736	IEM_SAVE_FLAGS A1, %2, %3
737	EPILOGUE_2_ARGS 0
738	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
739
740	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
741	PROLOGUE_2_ARGS
742	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
743	%1 word [A0]
744	IEM_SAVE_FLAGS A1, %2, %3
745	EPILOGUE_2_ARGS 0
746	ENDPROC iemAImpl_ %+ %1 %+ _u16
747
748	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
749	PROLOGUE_2_ARGS
750	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
751	lock %1 word [A0]
752	IEM_SAVE_FLAGS A1, %2, %3
753	EPILOGUE_2_ARGS 0
754	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
755
756	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
757	PROLOGUE_2_ARGS
758	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
759	%1 dword [A0]
760	IEM_SAVE_FLAGS A1, %2, %3
761	EPILOGUE_2_ARGS 0
762	ENDPROC iemAImpl_ %+ %1 %+ _u32
763
764	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
765	PROLOGUE_2_ARGS
766	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
767	lock %1 dword [A0]
768	IEM_SAVE_FLAGS A1, %2, %3
769	EPILOGUE_2_ARGS 0
770	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
771
772	%ifdef RT_ARCH_AMD64
773	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
774	PROLOGUE_2_ARGS
775	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
776	%1 qword [A0]
777	IEM_SAVE_FLAGS A1, %2, %3
778	EPILOGUE_2_ARGS 0
779	ENDPROC iemAImpl_ %+ %1 %+ _u64
780
781	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
782	PROLOGUE_2_ARGS
783	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
784	lock %1 qword [A0]
785	IEM_SAVE_FLAGS A1, %2, %3
786	EPILOGUE_2_ARGS 0
787	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
788	%else
789	; stub them for now.
790	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
791	int3
792	ret 0
793	ENDPROC iemAImpl_ %+ %1 %+ _u64
794	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
795	int3
796	ret 0
797	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
798	%endif
799
800	%endmacro
801
802	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
803	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
804	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
805	IEMIMPL_UNARY_OP not, 0, 0
806
807
808
809	;;
810	; Macro for implementing a shift operation.
811	;
812	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
813	; 32-bit system where the 64-bit accesses requires hand coding.
814	;
815	; All the functions takes a pointer to the destination memory operand in A0,
816	; the shift count in A1 and a pointer to eflags in A2.
817	;
818	; @param 1 The instruction mnemonic.
819	; @param 2 The modified flags.
820	; @param 3 The undefined flags.
821	;
822	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
823	;
824	%macro IEMIMPL_SHIFT_OP 3
825	BEGINCODE
826	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
827	PROLOGUE_3_ARGS
828	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
829	%ifdef ASM_CALL64_GCC
830	mov cl, A1_8
831	%1 byte [A0], cl
832	%else
833	xchg A1, A0
834	%1 byte [A1], cl
835	%endif
836	IEM_SAVE_FLAGS A2, %2, %3
837	EPILOGUE_3_ARGS 4
838	ENDPROC iemAImpl_ %+ %1 %+ _u8
839
840	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
841	PROLOGUE_3_ARGS
842	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
843	%ifdef ASM_CALL64_GCC
844	mov cl, A1_8
845	%1 word [A0], cl
846	%else
847	xchg A1, A0
848	%1 word [A1], cl
849	%endif
850	IEM_SAVE_FLAGS A2, %2, %3
851	EPILOGUE_3_ARGS 4
852	ENDPROC iemAImpl_ %+ %1 %+ _u16
853
854	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
855	PROLOGUE_3_ARGS
856	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
857	%ifdef ASM_CALL64_GCC
858	mov cl, A1_8
859	%1 dword [A0], cl
860	%else
861	xchg A1, A0
862	%1 dword [A1], cl
863	%endif
864	IEM_SAVE_FLAGS A2, %2, %3
865	EPILOGUE_3_ARGS 4
866	ENDPROC iemAImpl_ %+ %1 %+ _u32
867
868	%ifdef RT_ARCH_AMD64
869	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
870	PROLOGUE_3_ARGS
871	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
872	%ifdef ASM_CALL64_GCC
873	mov cl, A1_8
874	%1 qword [A0], cl
875	%else
876	xchg A1, A0
877	%1 qword [A1], cl
878	%endif
879	IEM_SAVE_FLAGS A2, %2, %3
880	EPILOGUE_3_ARGS 4
881	ENDPROC iemAImpl_ %+ %1 %+ _u64
882	%else ; stub it for now - later, replace with hand coded stuff.
883	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
884	int3
885	ret 4
886	ENDPROC iemAImpl_ %+ %1 %+ _u64
887	%endif ; !RT_ARCH_AMD64
888
889	%endmacro
890
891	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
892	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
893	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
894	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
895	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
896	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
897	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
898
899
900	;;
901	; Macro for implementing a double precision shift operation.
902	;
903	; This will generate code for the 16, 32 and 64 bit accesses, except on
904	; 32-bit system where the 64-bit accesses requires hand coding.
905	;
906	; The functions takes the destination operand (r/m) in A0, the source (reg) in
907	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
908	;
909	; @param 1 The instruction mnemonic.
910	; @param 2 The modified flags.
911	; @param 3 The undefined flags.
912	;
913	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
914	;
915	%macro IEMIMPL_SHIFT_DBL_OP 3
916	BEGINCODE
917	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
918	PROLOGUE_4_ARGS
919	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
920	%ifdef ASM_CALL64_GCC
921	xchg A3, A2
922	%1 [A0], A1_16, cl
923	xchg A3, A2
924	%else
925	xchg A0, A2
926	%1 [A2], A1_16, cl
927	%endif
928	IEM_SAVE_FLAGS A3, %2, %3
929	EPILOGUE_4_ARGS 8
930	ENDPROC iemAImpl_ %+ %1 %+ _u16
931
932	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
933	PROLOGUE_4_ARGS
934	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
935	%ifdef ASM_CALL64_GCC
936	xchg A3, A2
937	%1 [A0], A1_32, cl
938	xchg A3, A2
939	%else
940	xchg A0, A2
941	%1 [A2], A1_32, cl
942	%endif
943	IEM_SAVE_FLAGS A3, %2, %3
944	EPILOGUE_4_ARGS 8
945	ENDPROC iemAImpl_ %+ %1 %+ _u32
946
947	%ifdef RT_ARCH_AMD64
948	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
949	PROLOGUE_4_ARGS
950	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
951	%ifdef ASM_CALL64_GCC
952	xchg A3, A2
953	%1 [A0], A1, cl
954	xchg A3, A2
955	%else
956	xchg A0, A2
957	%1 [A2], A1, cl
958	%endif
959	IEM_SAVE_FLAGS A3, %2, %3
960	EPILOGUE_4_ARGS 12
961	ENDPROC iemAImpl_ %+ %1 %+ _u64
962	%else ; stub it for now - later, replace with hand coded stuff.
963	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
964	int3
965	ret 12
966	ENDPROC iemAImpl_ %+ %1 %+ _u64
967	%endif ; !RT_ARCH_AMD64
968
969	%endmacro
970
971	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
972	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
973
974
975	;;
976	; Macro for implementing a multiplication operations.
977	;
978	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
979	; 32-bit system where the 64-bit accesses requires hand coding.
980	;
981	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
982	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
983	; pointer to eflags in A3.
984	;
985	; The functions all return 0 so the caller can be used for div/idiv as well as
986	; for the mul/imul implementation.
987	;
988	; @param 1 The instruction mnemonic.
989	; @param 2 The modified flags.
990	; @param 3 The undefined flags.
991	;
992	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
993	;
994	%macro IEMIMPL_MUL_OP 3
995	BEGINCODE
996	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
997	PROLOGUE_3_ARGS
998	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
999	mov al, [A0]
1000	%1 A1_8
1001	mov [A0], ax
1002	IEM_SAVE_FLAGS A2, %2, %3
1003	xor eax, eax
1004	EPILOGUE_3_ARGS 4
1005	ENDPROC iemAImpl_ %+ %1 %+ _u8
1006
1007	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1008	PROLOGUE_4_ARGS
1009	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1010	mov ax, [A0]
1011	%ifdef ASM_CALL64_GCC
1012	%1 A2_16
1013	mov [A0], ax
1014	mov [A1], dx
1015	%else
1016	mov T1, A1
1017	%1 A2_16
1018	mov [A0], ax
1019	mov [T1], dx
1020	%endif
1021	IEM_SAVE_FLAGS A3, %2, %3
1022	xor eax, eax
1023	EPILOGUE_4_ARGS 8
1024	ENDPROC iemAImpl_ %+ %1 %+ _u16
1025
1026	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1027	PROLOGUE_4_ARGS
1028	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1029	mov eax, [A0]
1030	%ifdef ASM_CALL64_GCC
1031	%1 A2_32
1032	mov [A0], eax
1033	mov [A1], edx
1034	%else
1035	mov T1, A1
1036	%1 A2_32
1037	mov [A0], eax
1038	mov [T1], edx
1039	%endif
1040	IEM_SAVE_FLAGS A3, %2, %3
1041	xor eax, eax
1042	EPILOGUE_4_ARGS 8
1043	ENDPROC iemAImpl_ %+ %1 %+ _u32
1044
1045	%ifdef RT_ARCH_AMD64
1046	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1047	PROLOGUE_4_ARGS
1048	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1049	mov rax, [A0]
1050	%ifdef ASM_CALL64_GCC
1051	%1 A2
1052	mov [A0], rax
1053	mov [A1], rdx
1054	%else
1055	mov T1, A1
1056	%1 A2
1057	mov [A0], rax
1058	mov [T1], rdx
1059	%endif
1060	IEM_SAVE_FLAGS A3, %2, %3
1061	xor eax, eax
1062	EPILOGUE_4_ARGS 12
1063	ENDPROC iemAImpl_ %+ %1 %+ _u64
1064	%else ; stub it for now - later, replace with hand coded stuff.
1065	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1066	int3
1067	ret 12
1068	ENDPROC iemAImpl_ %+ %1 %+ _u64
1069	%endif ; !RT_ARCH_AMD64
1070
1071	%endmacro
1072
1073	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1074	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1075
1076
1077	;;
1078	; Macro for implementing a division operations.
1079	;
1080	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1081	; 32-bit system where the 64-bit accesses requires hand coding.
1082	;
1083	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1084	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1085	; pointer to eflags in A3.
1086	;
1087	; The functions all return 0 on success and -1 if a divide error should be
1088	; raised by the caller.
1089	;
1090	; @param 1 The instruction mnemonic.
1091	; @param 2 The modified flags.
1092	; @param 3 The undefined flags.
1093	;
1094	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1095	;
1096	%macro IEMIMPL_DIV_OP 3
1097	BEGINCODE
1098	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1099	PROLOGUE_3_ARGS
1100
1101	test A1_8, A1_8
1102	jz .div_zero
1103	;; @todo test for overflow
1104
1105	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1106	mov ax, [A0]
1107	%1 A1_8
1108	mov [A0], ax
1109	IEM_SAVE_FLAGS A2, %2, %3
1110	xor eax, eax
1111
1112	.return:
1113	EPILOGUE_3_ARGS 4
1114
1115	.div_zero:
1116	mov eax, -1
1117	jmp .return
1118	ENDPROC iemAImpl_ %+ %1 %+ _u8
1119
1120	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1121	PROLOGUE_4_ARGS
1122
1123	test A1_16, A1_16
1124	jz .div_zero
1125	;; @todo test for overflow
1126
1127	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1128	%ifdef ASM_CALL64_GCC
1129	mov T1, A2
1130	mov ax, [A0]
1131	mov dx, [A1]
1132	%1 T1_16
1133	mov [A0], ax
1134	mov [A1], dx
1135	%else
1136	mov T1, A1
1137	mov ax, [A0]
1138	mov dx, [T1]
1139	%1 A2_16
1140	mov [A0], ax
1141	mov [T1], dx
1142	%endif
1143	IEM_SAVE_FLAGS A3, %2, %3
1144	xor eax, eax
1145
1146	.return:
1147	EPILOGUE_4_ARGS 8
1148
1149	.div_zero:
1150	mov eax, -1
1151	jmp .return
1152	ENDPROC iemAImpl_ %+ %1 %+ _u16
1153
1154	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1155	PROLOGUE_4_ARGS
1156
1157	test A1_32, A1_32
1158	jz .div_zero
1159	;; @todo test for overflow
1160
1161	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1162	mov eax, [A0]
1163	%ifdef ASM_CALL64_GCC
1164	mov T1, A2
1165	mov eax, [A0]
1166	mov edx, [A1]
1167	%1 T1_32
1168	mov [A0], eax
1169	mov [A1], edx
1170	%else
1171	mov T1, A1
1172	mov eax, [A0]
1173	mov edx, [T1]
1174	%1 A2_32
1175	mov [A0], eax
1176	mov [T1], edx
1177	%endif
1178	IEM_SAVE_FLAGS A3, %2, %3
1179	xor eax, eax
1180
1181	.return:
1182	EPILOGUE_4_ARGS 8
1183
1184	.div_zero:
1185	mov eax, -1
1186	jmp .return
1187	ENDPROC iemAImpl_ %+ %1 %+ _u32
1188
1189	%ifdef RT_ARCH_AMD64
1190	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1191	PROLOGUE_4_ARGS
1192
1193	test A1, A1
1194	jz .div_zero
1195	;; @todo test for overflow
1196
1197	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1198	mov rax, [A0]
1199	%ifdef ASM_CALL64_GCC
1200	mov T1, A2
1201	mov rax, [A0]
1202	mov rdx, [A1]
1203	%1 T1
1204	mov [A0], rax
1205	mov [A1], rdx
1206	%else
1207	mov T1, A1
1208	mov rax, [A0]
1209	mov rdx, [T1]
1210	%1 A2
1211	mov [A0], rax
1212	mov [T1], rdx
1213	%endif
1214	IEM_SAVE_FLAGS A3, %2, %3
1215	xor eax, eax
1216
1217	.return:
1218	EPILOGUE_4_ARGS 12
1219
1220	.div_zero:
1221	mov eax, -1
1222	jmp .return
1223	ENDPROC iemAImpl_ %+ %1 %+ _u64
1224	%else ; stub it for now - later, replace with hand coded stuff.
1225	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1226	int3
1227	ret
1228	ENDPROC iemAImpl_ %+ %1 %+ _u64
1229	%endif ; !RT_ARCH_AMD64
1230
1231	%endmacro
1232
1233	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1234	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1235
1236
1237	;
1238	; BSWAP. No flag changes.
1239	;
1240	; Each function takes one argument, pointer to the value to bswap
1241	; (input/output). They all return void.
1242	;
1243	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1244	PROLOGUE_1_ARGS
1245	mov T0_32, [A0] ; just in case any of the upper bits are used.
1246	db 66h
1247	bswap T0_32
1248	mov [A0], T0_32
1249	EPILOGUE_1_ARGS 0
1250	ENDPROC iemAImpl_bswap_u16
1251
1252	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1253	PROLOGUE_1_ARGS
1254	mov T0_32, [A0]
1255	bswap T0_32
1256	mov [A0], T0_32
1257	EPILOGUE_1_ARGS 0
1258	ENDPROC iemAImpl_bswap_u32
1259
1260	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1261	%ifdef RT_ARCH_AMD64
1262	PROLOGUE_1_ARGS
1263	mov T0, [A0]
1264	bswap T0
1265	mov [A0], T0
1266	EPILOGUE_1_ARGS 0
1267	%else
1268	PROLOGUE_1_ARGS
1269	mov T0, [A0]
1270	mov T1, [A0 + 4]
1271	bswap T0
1272	bswap T1
1273	mov [A0 + 4], T0
1274	mov [A0], T1
1275	EPILOGUE_1_ARGS 0
1276	%endif
1277	ENDPROC iemAImpl_bswap_u64
1278
1279
1280	;;
1281	; Initialize the FPU for the actual instruction being emulated, this means
1282	; loading parts of the guest's control word and status word.
1283	;
1284	; @uses 24 bytes of stack.
1285	; @param 1 Expression giving the address of the FXSTATE of the guest.
1286	;
1287	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1288	fnstenv [xSP]
1289
1290	; FCW - for exception, precision and rounding control.
1291	movzx T0, word [%1 + X86FXSTATE.FCW]
1292	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
1293	mov [xSP + X86FSTENV32P.FCW], T0_16
1294
1295	; FSW - for undefined C0, C1, C2, and C3.
1296	movzx T1, word [%1 + X86FXSTATE.FSW]
1297	and T1, X86_FSW_C_MASK
1298	movzx T0, word [xSP + X86FSTENV32P.FSW]
1299	and T0, X86_FSW_TOP_MASK
1300	or T0, T1
1301	mov [xSP + X86FSTENV32P.FSW], T0_16
1302
1303	fldenv [xSP]
1304	%endmacro
1305
1306
1307	;;
1308	; Need to move this as well somewhere better?
1309	;
1310	struc IEMFPURESULT
1311	.r80Result resw 5
1312	.FSW resw 1
1313	endstruc
1314
1315
1316	;;
1317	; Need to move this as well somewhere better?
1318	;
1319	struc IEMFPURESULTTWO
1320	.r80Result1 resw 5
1321	.FSW resw 1
1322	.r80Result2 resw 5
1323	endstruc
1324
1325
1326	;
1327	;---------------------- 16-bit signed integer operations ----------------------
1328	;
1329
1330
1331	;;
1332	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1333	;
1334	; @param A0 FPU context (fxsave).
1335	; @param A1 Pointer to a IEMFPURESULT for the output.
1336	; @param A2 Pointer to the 16-bit floating point value to convert.
1337	;
1338	BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1339	PROLOGUE_3_ARGS
1340	sub xSP, 20h
1341
1342	fninit
1343	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1344	fild word [A2]
1345
1346	fnstsw word [A1 + IEMFPURESULT.FSW]
1347	fnclex
1348	fstp tword [A1 + IEMFPURESULT.r80Result]
1349
1350	fninit
1351	add xSP, 20h
1352	EPILOGUE_3_ARGS 0
1353	ENDPROC iemAImpl_fild_i16_to_r80
1354
1355
1356	;;
1357	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1358	;
1359	; @param A0 FPU context (fxsave).
1360	; @param A1 Where to return the output FSW.
1361	; @param A2 Where to store the 16-bit signed integer value.
1362	; @param A3 Pointer to the 80-bit value.
1363	;
1364	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 12
1365	PROLOGUE_3_ARGS
1366	sub xSP, 20h
1367
1368	fninit
1369	fld tword [A3]
1370	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1371	fistp word [A2]
1372
1373	fnstsw word [A1]
1374
1375	fninit
1376	add xSP, 20h
1377	EPILOGUE_3_ARGS 0
1378	ENDPROC iemAImpl_fist_r80_to_i16
1379
1380
1381	;;
1382	; Store a 80-bit floating point value (register) as a 16-bit signed integer
1383	; (memory) with truncation.
1384	;
1385	; @param A0 FPU context (fxsave).
1386	; @param A1 Where to return the output FSW.
1387	; @param A2 Where to store the 16-bit signed integer value.
1388	; @param A3 Pointer to the 80-bit value.
1389	;
1390	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 12
1391	PROLOGUE_3_ARGS
1392	sub xSP, 20h
1393
1394	fninit
1395	fld tword [A3]
1396	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1397	fisttp dword [A2]
1398
1399	fnstsw word [A1]
1400
1401	fninit
1402	add xSP, 20h
1403	EPILOGUE_3_ARGS 0
1404	ENDPROC iemAImpl_fistt_r80_to_i16
1405
1406
1407
1408	;
1409	;---------------------- 32-bit signed integer operations ----------------------
1410	;
1411
1412
1413	;;
1414	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1415	;
1416	; @param A0 FPU context (fxsave).
1417	; @param A1 Pointer to a IEMFPURESULT for the output.
1418	; @param A2 Pointer to the 32-bit floating point value to convert.
1419	;
1420	BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1421	PROLOGUE_3_ARGS
1422	sub xSP, 20h
1423
1424	fninit
1425	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1426	fild dword [A2]
1427
1428	fnstsw word [A1 + IEMFPURESULT.FSW]
1429	fnclex
1430	fstp tword [A1 + IEMFPURESULT.r80Result]
1431
1432	fninit
1433	add xSP, 20h
1434	EPILOGUE_3_ARGS 0
1435	ENDPROC iemAImpl_fild_i32_to_r80
1436
1437
1438	;;
1439	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1440	;
1441	; @param A0 FPU context (fxsave).
1442	; @param A1 Where to return the output FSW.
1443	; @param A2 Where to store the 32-bit signed integer value.
1444	; @param A3 Pointer to the 80-bit value.
1445	;
1446	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 12
1447	PROLOGUE_3_ARGS
1448	sub xSP, 20h
1449
1450	fninit
1451	fld tword [A3]
1452	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1453	fistp dword [A2]
1454
1455	fnstsw word [A1]
1456
1457	fninit
1458	add xSP, 20h
1459	EPILOGUE_3_ARGS 0
1460	ENDPROC iemAImpl_fist_r80_to_i32
1461
1462
1463	;;
1464	; Store a 80-bit floating point value (register) as a 32-bit signed integer
1465	; (memory) with truncation.
1466	;
1467	; @param A0 FPU context (fxsave).
1468	; @param A1 Where to return the output FSW.
1469	; @param A2 Where to store the 32-bit signed integer value.
1470	; @param A3 Pointer to the 80-bit value.
1471	;
1472	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 12
1473	PROLOGUE_3_ARGS
1474	sub xSP, 20h
1475
1476	fninit
1477	fld tword [A3]
1478	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1479	fisttp dword [A2]
1480
1481	fnstsw word [A1]
1482
1483	fninit
1484	add xSP, 20h
1485	EPILOGUE_3_ARGS 0
1486	ENDPROC iemAImpl_fistt_r80_to_i32
1487
1488
1489	;;
1490	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1491	;
1492	; @param 1 The instruction
1493	;
1494	; @param A0 FPU context (fxsave).
1495	; @param A1 Pointer to a IEMFPURESULT for the output.
1496	; @param A2 Pointer to the 80-bit value.
1497	; @param A3 Pointer to the 32-bit value.
1498	;
1499	%macro IEMIMPL_FPU_R80_BY_I32 1
1500	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1501	PROLOGUE_4_ARGS
1502	sub xSP, 20h
1503
1504	fninit
1505	fld tword [A2]
1506	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1507	%1 dword [A3]
1508
1509	fnstsw word [A1 + IEMFPURESULT.FSW]
1510	fnclex
1511	fstp tword [A1 + IEMFPURESULT.r80Result]
1512
1513	fninit
1514	add xSP, 20h
1515	EPILOGUE_4_ARGS 8
1516	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1517	%endmacro
1518
1519	IEMIMPL_FPU_R80_BY_I32 fiadd
1520	IEMIMPL_FPU_R80_BY_I32 fimul
1521	IEMIMPL_FPU_R80_BY_I32 fisub
1522	IEMIMPL_FPU_R80_BY_I32 fisubr
1523	IEMIMPL_FPU_R80_BY_I32 fidiv
1524	IEMIMPL_FPU_R80_BY_I32 fidivr
1525
1526
1527	;;
1528	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1529	; only returning FSW.
1530	;
1531	; @param 1 The instruction
1532	;
1533	; @param A0 FPU context (fxsave).
1534	; @param A1 Where to store the output FSW.
1535	; @param A2 Pointer to the 80-bit value.
1536	; @param A3 Pointer to the 64-bit value.
1537	;
1538	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1539	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1540	PROLOGUE_4_ARGS
1541	sub xSP, 20h
1542
1543	fninit
1544	fld tword [A2]
1545	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1546	%1 dword [A3]
1547
1548	fnstsw word [A1]
1549
1550	fninit
1551	add xSP, 20h
1552	EPILOGUE_4_ARGS 8
1553	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1554	%endmacro
1555
1556	IEMIMPL_FPU_R80_BY_I32_FSW ficom
1557
1558
1559
1560	;
1561	;---------------------- 64-bit signed integer operations ----------------------
1562	;
1563
1564
1565	;;
1566	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1567	;
1568	; @param A0 FPU context (fxsave).
1569	; @param A1 Pointer to a IEMFPURESULT for the output.
1570	; @param A2 Pointer to the 64-bit floating point value to convert.
1571	;
1572	BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1573	PROLOGUE_3_ARGS
1574	sub xSP, 20h
1575
1576	fninit
1577	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1578	fild qword [A2]
1579
1580	fnstsw word [A1 + IEMFPURESULT.FSW]
1581	fnclex
1582	fstp tword [A1 + IEMFPURESULT.r80Result]
1583
1584	fninit
1585	add xSP, 20h
1586	EPILOGUE_3_ARGS 0
1587	ENDPROC iemAImpl_fild_i64_to_r80
1588
1589
1590	;;
1591	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1592	;
1593	; @param A0 FPU context (fxsave).
1594	; @param A1 Where to return the output FSW.
1595	; @param A2 Where to store the 64-bit signed integer value.
1596	; @param A3 Pointer to the 80-bit value.
1597	;
1598	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 12
1599	PROLOGUE_3_ARGS
1600	sub xSP, 20h
1601
1602	fninit
1603	fld tword [A3]
1604	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1605	fistp qword [A2]
1606
1607	fnstsw word [A1]
1608
1609	fninit
1610	add xSP, 20h
1611	EPILOGUE_3_ARGS 0
1612	ENDPROC iemAImpl_fist_r80_to_i64
1613
1614
1615	;;
1616	; Store a 80-bit floating point value (register) as a 64-bit signed integer
1617	; (memory) with truncation.
1618	;
1619	; @param A0 FPU context (fxsave).
1620	; @param A1 Where to return the output FSW.
1621	; @param A2 Where to store the 64-bit signed integer value.
1622	; @param A3 Pointer to the 80-bit value.
1623	;
1624	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 12
1625	PROLOGUE_3_ARGS
1626	sub xSP, 20h
1627
1628	fninit
1629	fld tword [A3]
1630	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1631	fisttp qword [A2]
1632
1633	fnstsw word [A1]
1634
1635	fninit
1636	add xSP, 20h
1637	EPILOGUE_3_ARGS 0
1638	ENDPROC iemAImpl_fistt_r80_to_i64
1639
1640
1641
1642	;
1643	;---------------------- 32-bit floating point operations ----------------------
1644	;
1645
1646	;;
1647	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1648	;
1649	; @param A0 FPU context (fxsave).
1650	; @param A1 Pointer to a IEMFPURESULT for the output.
1651	; @param A2 Pointer to the 32-bit floating point value to convert.
1652	;
1653	BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
1654	PROLOGUE_3_ARGS
1655	sub xSP, 20h
1656
1657	fninit
1658	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1659	fld dword [A2]
1660
1661	fnstsw word [A1 + IEMFPURESULT.FSW]
1662	fnclex
1663	fstp tword [A1 + IEMFPURESULT.r80Result]
1664
1665	fninit
1666	add xSP, 20h
1667	EPILOGUE_3_ARGS 0
1668	ENDPROC iemAImpl_fld_r32_to_r80
1669
1670
1671	;;
1672	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
1673	;
1674	; @param A0 FPU context (fxsave).
1675	; @param A1 Where to return the output FSW.
1676	; @param A2 Where to store the 32-bit value.
1677	; @param A3 Pointer to the 80-bit value.
1678	;
1679	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 12
1680	PROLOGUE_3_ARGS
1681	sub xSP, 20h
1682
1683	fninit
1684	fld tword [A3]
1685	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1686	fst dword [A2]
1687
1688	fnstsw word [A1]
1689
1690	fninit
1691	add xSP, 20h
1692	EPILOGUE_3_ARGS 0
1693	ENDPROC iemAImpl_fst_r80_to_r32
1694
1695
1696	;;
1697	; FPU instruction working on one 80-bit and one 32-bit floating point value.
1698	;
1699	; @param 1 The instruction
1700	;
1701	; @param A0 FPU context (fxsave).
1702	; @param A1 Pointer to a IEMFPURESULT for the output.
1703	; @param A2 Pointer to the 80-bit value.
1704	; @param A3 Pointer to the 32-bit value.
1705	;
1706	%macro IEMIMPL_FPU_R80_BY_R32 1
1707	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
1708	PROLOGUE_4_ARGS
1709	sub xSP, 20h
1710
1711	fninit
1712	fld tword [A2]
1713	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1714	%1 dword [A3]
1715
1716	fnstsw word [A1 + IEMFPURESULT.FSW]
1717	fnclex
1718	fstp tword [A1 + IEMFPURESULT.r80Result]
1719
1720	fninit
1721	add xSP, 20h
1722	EPILOGUE_4_ARGS 8
1723	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
1724	%endmacro
1725
1726	IEMIMPL_FPU_R80_BY_R32 fadd
1727	IEMIMPL_FPU_R80_BY_R32 fmul
1728	IEMIMPL_FPU_R80_BY_R32 fsub
1729	IEMIMPL_FPU_R80_BY_R32 fsubr
1730	IEMIMPL_FPU_R80_BY_R32 fdiv
1731	IEMIMPL_FPU_R80_BY_R32 fdivr
1732
1733
1734	;;
1735	; FPU instruction working on one 80-bit and one 32-bit floating point value,
1736	; only returning FSW.
1737	;
1738	; @param 1 The instruction
1739	;
1740	; @param A0 FPU context (fxsave).
1741	; @param A1 Where to store the output FSW.
1742	; @param A2 Pointer to the 80-bit value.
1743	; @param A3 Pointer to the 64-bit value.
1744	;
1745	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
1746	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
1747	PROLOGUE_4_ARGS
1748	sub xSP, 20h
1749
1750	fninit
1751	fld tword [A2]
1752	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1753	%1 dword [A3]
1754
1755	fnstsw word [A1]
1756
1757	fninit
1758	add xSP, 20h
1759	EPILOGUE_4_ARGS 8
1760	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
1761	%endmacro
1762
1763	IEMIMPL_FPU_R80_BY_R32_FSW fcom
1764
1765
1766
1767	;
1768	;---------------------- 64-bit floating point operations ----------------------
1769	;
1770
1771	;;
1772	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1773	;
1774	; @param A0 FPU context (fxsave).
1775	; @param A1 Pointer to a IEMFPURESULT for the output.
1776	; @param A2 Pointer to the 64-bit floating point value to convert.
1777	;
1778	BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
1779	PROLOGUE_3_ARGS
1780	sub xSP, 20h
1781
1782	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1783	fld qword [A2]
1784
1785	fnstsw word [A1 + IEMFPURESULT.FSW]
1786	fnclex
1787	fstp tword [A1 + IEMFPURESULT.r80Result]
1788
1789	fninit
1790	add xSP, 20h
1791	EPILOGUE_3_ARGS 0
1792	ENDPROC iemAImpl_fld_r64_to_r80
1793
1794
1795	;;
1796	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
1797	;
1798	; @param A0 FPU context (fxsave).
1799	; @param A1 Where to return the output FSW.
1800	; @param A2 Where to store the 64-bit value.
1801	; @param A3 Pointer to the 80-bit value.
1802	;
1803	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 12
1804	PROLOGUE_3_ARGS
1805	sub xSP, 20h
1806
1807	fninit
1808	fld tword [A3]
1809	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1810	fst qword [A2]
1811
1812	fnstsw word [A1]
1813
1814	fninit
1815	add xSP, 20h
1816	EPILOGUE_3_ARGS 0
1817	ENDPROC iemAImpl_fst_r80_to_r64
1818
1819
1820	;;
1821	; FPU instruction working on one 80-bit and one 64-bit floating point value.
1822	;
1823	; @param 1 The instruction
1824	;
1825	; @param A0 FPU context (fxsave).
1826	; @param A1 Pointer to a IEMFPURESULT for the output.
1827	; @param A2 Pointer to the 80-bit value.
1828	; @param A3 Pointer to the 64-bit value.
1829	;
1830	%macro IEMIMPL_FPU_R80_BY_R64 1
1831	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
1832	PROLOGUE_4_ARGS
1833	sub xSP, 20h
1834
1835	fninit
1836	fld tword [A2]
1837	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1838	%1 qword [A3]
1839
1840	fnstsw word [A1 + IEMFPURESULT.FSW]
1841	fnclex
1842	fstp tword [A1 + IEMFPURESULT.r80Result]
1843
1844	fninit
1845	add xSP, 20h
1846	EPILOGUE_4_ARGS 8
1847	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
1848	%endmacro
1849
1850	IEMIMPL_FPU_R80_BY_R64 fadd
1851	IEMIMPL_FPU_R80_BY_R64 fmul
1852	IEMIMPL_FPU_R80_BY_R64 fsub
1853	IEMIMPL_FPU_R80_BY_R64 fsubr
1854	IEMIMPL_FPU_R80_BY_R64 fdiv
1855	IEMIMPL_FPU_R80_BY_R64 fdivr
1856
1857	;;
1858	; FPU instruction working on one 80-bit and one 64-bit floating point value,
1859	; only returning FSW.
1860	;
1861	; @param 1 The instruction
1862	;
1863	; @param A0 FPU context (fxsave).
1864	; @param A1 Where to store the output FSW.
1865	; @param A2 Pointer to the 80-bit value.
1866	; @param A3 Pointer to the 64-bit value.
1867	;
1868	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
1869	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
1870	PROLOGUE_4_ARGS
1871	sub xSP, 20h
1872
1873	fninit
1874	fld tword [A2]
1875	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1876	%1 qword [A3]
1877
1878	fnstsw word [A1]
1879
1880	fninit
1881	add xSP, 20h
1882	EPILOGUE_4_ARGS 8
1883	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
1884	%endmacro
1885
1886	IEMIMPL_FPU_R80_BY_R64_FSW fcom
1887
1888
1889
1890	;
1891	;---------------------- 80-bit floating point operations ----------------------
1892	;
1893
1894	;;
1895	; FPU instruction working on two 80-bit floating point values.
1896	;
1897	; @param 1 The instruction
1898	;
1899	; @param A0 FPU context (fxsave).
1900	; @param A1 Pointer to a IEMFPURESULT for the output.
1901	; @param A2 Pointer to the first 80-bit value (ST0)
1902	; @param A3 Pointer to the second 80-bit value (STn).
1903	;
1904	%macro IEMIMPL_FPU_R80_BY_R80 2
1905	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
1906	PROLOGUE_4_ARGS
1907	sub xSP, 20h
1908
1909	fninit
1910	fld tword [A3]
1911	fld tword [A2]
1912	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1913	%1 %2
1914
1915	fnstsw word [A1 + IEMFPURESULT.FSW]
1916	fnclex
1917	fstp tword [A1 + IEMFPURESULT.r80Result]
1918
1919	fninit
1920	add xSP, 20h
1921	EPILOGUE_4_ARGS 8
1922	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
1923	%endmacro
1924
1925	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
1926	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
1927	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
1928	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
1929	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
1930	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
1931	IEMIMPL_FPU_R80_BY_R80 fprem, {}
1932	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
1933	IEMIMPL_FPU_R80_BY_R80 fscale, {}
1934
1935
1936	;;
1937	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
1938	; storing the result in ST1 and popping the stack.
1939	;
1940	; @param 1 The instruction
1941	;
1942	; @param A0 FPU context (fxsave).
1943	; @param A1 Pointer to a IEMFPURESULT for the output.
1944	; @param A2 Pointer to the first 80-bit value (ST1).
1945	; @param A3 Pointer to the second 80-bit value (ST0).
1946	;
1947	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
1948	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
1949	PROLOGUE_4_ARGS
1950	sub xSP, 20h
1951
1952	fninit
1953	fld tword [A2]
1954	fld tword [A3]
1955	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1956	%1
1957
1958	fnstsw word [A1 + IEMFPURESULT.FSW]
1959	fnclex
1960	fstp tword [A1 + IEMFPURESULT.r80Result]
1961
1962	fninit
1963	add xSP, 20h
1964	EPILOGUE_4_ARGS 8
1965	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
1966	%endmacro
1967
1968	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
1969	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
1970
1971
1972	;;
1973	; FPU instruction working on two 80-bit floating point values, only
1974	; returning FSW.
1975	;
1976	; @param 1 The instruction
1977	;
1978	; @param A0 FPU context (fxsave).
1979	; @param A1 Pointer to a uint16_t for the resulting FSW.
1980	; @param A2 Pointer to the first 80-bit value.
1981	; @param A3 Pointer to the second 80-bit value.
1982	;
1983	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
1984	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
1985	PROLOGUE_4_ARGS
1986	sub xSP, 20h
1987
1988	fninit
1989	fld tword [A3]
1990	fld tword [A2]
1991	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1992	%1 st0, st1
1993
1994	fnstsw word [A1]
1995
1996	fninit
1997	add xSP, 20h
1998	EPILOGUE_4_ARGS 8
1999	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2000	%endmacro
2001
2002	IEMIMPL_FPU_R80_BY_R80_FSW fcom
2003	IEMIMPL_FPU_R80_BY_R80_FSW fucom
2004
2005
2006	;;
2007	; FPU instruction working on one 80-bit floating point value.
2008	;
2009	; @param 1 The instruction
2010	;
2011	; @param A0 FPU context (fxsave).
2012	; @param A1 Pointer to a IEMFPURESULT for the output.
2013	; @param A2 Pointer to the 80-bit value.
2014	;
2015	%macro IEMIMPL_FPU_R80 1
2016	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2017	PROLOGUE_3_ARGS
2018	sub xSP, 20h
2019
2020	fninit
2021	fld tword [A2]
2022	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2023	%1
2024
2025	fnstsw word [A1 + IEMFPURESULT.FSW]
2026	fnclex
2027	fstp tword [A1 + IEMFPURESULT.r80Result]
2028
2029	fninit
2030	add xSP, 20h
2031	EPILOGUE_3_ARGS 4
2032	ENDPROC iemAImpl_ %+ %1 %+ _r80
2033	%endmacro
2034
2035	IEMIMPL_FPU_R80 fchs
2036	IEMIMPL_FPU_R80 fabs
2037	IEMIMPL_FPU_R80 f2xm1
2038	IEMIMPL_FPU_R80 fyl2x
2039	IEMIMPL_FPU_R80 fsqrt
2040	IEMIMPL_FPU_R80 frndint
2041	IEMIMPL_FPU_R80 fsin
2042	IEMIMPL_FPU_R80 fcos
2043
2044
2045	;;
2046	; FPU instruction working on one 80-bit floating point value, only
2047	; returning FSW.
2048	;
2049	; @param 1 The instruction
2050	;
2051	; @param A0 FPU context (fxsave).
2052	; @param A1 Pointer to a uint16_t for the resulting FSW.
2053	; @param A2 Pointer to the 80-bit value.
2054	;
2055	%macro IEMIMPL_FPU_R80_FSW 1
2056	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2057	PROLOGUE_3_ARGS
2058	sub xSP, 20h
2059
2060	fninit
2061	fld tword [A2]
2062	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2063	%1
2064
2065	fnstsw word [A1]
2066
2067	fninit
2068	add xSP, 20h
2069	EPILOGUE_3_ARGS 4
2070	ENDPROC iemAImpl_ %+ %1 %+ _r80
2071	%endmacro
2072
2073	IEMIMPL_FPU_R80_FSW ftst
2074	IEMIMPL_FPU_R80_FSW fxam
2075
2076
2077
2078	;;
2079	; FPU instruction loading a 80-bit floating point constant.
2080	;
2081	; @param 1 The instruction
2082	;
2083	; @param A0 FPU context (fxsave).
2084	; @param A1 Pointer to a IEMFPURESULT for the output.
2085	;
2086	%macro IEMIMPL_FPU_R80_CONST 1
2087	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2088	PROLOGUE_2_ARGS
2089	sub xSP, 20h
2090
2091	fninit
2092	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2093	%1
2094
2095	fnstsw word [A1 + IEMFPURESULT.FSW]
2096	fnclex
2097	fstp tword [A1 + IEMFPURESULT.r80Result]
2098
2099	fninit
2100	add xSP, 20h
2101	EPILOGUE_2_ARGS 0
2102	ENDPROC iemAImpl_ %+ %1 %+
2103	%endmacro
2104
2105	IEMIMPL_FPU_R80_CONST fld1
2106	IEMIMPL_FPU_R80_CONST fldl2t
2107	IEMIMPL_FPU_R80_CONST fldl2e
2108	IEMIMPL_FPU_R80_CONST fldpi
2109	IEMIMPL_FPU_R80_CONST fldlg2
2110	IEMIMPL_FPU_R80_CONST fldln2
2111	IEMIMPL_FPU_R80_CONST fldz
2112
2113
2114	;;
2115	; FPU instruction working on one 80-bit floating point value, outputing two.
2116	;
2117	; @param 1 The instruction
2118	;
2119	; @param A0 FPU context (fxsave).
2120	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2121	; @param A2 Pointer to the 80-bit value.
2122	;
2123	%macro IEMIMPL_FPU_R80_R80 1
2124	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2125	PROLOGUE_3_ARGS
2126	sub xSP, 20h
2127
2128	fninit
2129	fld tword [A2]
2130	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2131	%1
2132
2133	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2134	fnclex
2135	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2136	fnclex
2137	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2138
2139	fninit
2140	add xSP, 20h
2141	EPILOGUE_3_ARGS 4
2142	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2143	%endmacro
2144
2145	IEMIMPL_FPU_R80_R80 fptan
2146	IEMIMPL_FPU_R80_R80 fxtract
2147	IEMIMPL_FPU_R80_R80 fsincos
2148

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 40248

Download in other formats: