VirtualBox

source: vbox/trunk/src/recompiler/tcg/i386/tcg-target.c@ 37696

Last change on this file since 37696 was 37696, checked in by vboxsync, 13 years ago

rem: fixed 64-bit host bustage.

  • Property svn:eol-style set to native
File size: 65.7 KB
Line 
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53#else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61#endif
62};
63
64static const int tcg_target_call_iarg_regs[] = {
65#if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72#else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76#endif
77};
78
79static const int tcg_target_call_oarg_regs[2] = {
80 TCG_REG_EAX,
81 TCG_REG_EDX
82};
83
84static uint8_t *tb_ret_addr;
85
86static void patch_reloc(uint8_t *code_ptr, int type,
87 tcg_target_long value, tcg_target_long addend)
88{
89 value += addend;
90 switch(type) {
91 case R_386_PC32:
92 value -= (uintptr_t)code_ptr;
93 if (value != (int32_t)value) {
94 tcg_abort();
95 }
96 *(uint32_t *)code_ptr = value;
97 break;
98 case R_386_PC8:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int8_t)value) {
101 tcg_abort();
102 }
103 *(uint8_t *)code_ptr = value;
104 break;
105 default:
106 tcg_abort();
107 }
108}
109
110#ifdef VBOX
111/* emits stack alignment checks for strict builds. */
112DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
113{
114# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
115 tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
116 tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
117 tcg_out8(s, 0x74); /* jz imm8 */
118 tcg_out8(s, 1); /* $+3 (over int3) */
119 tcg_out8(s, 0xcc); /* int3 */
120# else
121 NOREF(s);
122# endif
123}
124#endif /* VBOX */
125
126/* maximum number of register used for input function arguments */
127static inline int tcg_target_get_call_iarg_regs_count(int flags)
128{
129 if (TCG_TARGET_REG_BITS == 64) {
130 return 6;
131 }
132
133 flags &= TCG_CALL_TYPE_MASK;
134 switch(flags) {
135 case TCG_CALL_TYPE_STD:
136 return 0;
137 case TCG_CALL_TYPE_REGPARM_1:
138 case TCG_CALL_TYPE_REGPARM_2:
139 case TCG_CALL_TYPE_REGPARM:
140 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
141 default:
142 tcg_abort();
143 }
144}
145
146/* parse target specific constraints */
147static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148{
149 const char *ct_str;
150
151 ct_str = *pct_str;
152 switch(ct_str[0]) {
153 case 'a':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156 break;
157 case 'b':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160 break;
161 case 'c':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164 break;
165 case 'd':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168 break;
169 case 'S':
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172 break;
173 case 'D':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176 break;
177 case 'q':
178 ct->ct |= TCG_CT_REG;
179 if (TCG_TARGET_REG_BITS == 64) {
180 tcg_regset_set32(ct->u.regs, 0, 0xffff);
181 } else {
182 tcg_regset_set32(ct->u.regs, 0, 0xf);
183 }
184 break;
185 case 'r':
186 ct->ct |= TCG_CT_REG;
187 if (TCG_TARGET_REG_BITS == 64) {
188 tcg_regset_set32(ct->u.regs, 0, 0xffff);
189 } else {
190 tcg_regset_set32(ct->u.regs, 0, 0xff);
191 }
192 break;
193
194 /* qemu_ld/st address constraint */
195 case 'L':
196 ct->ct |= TCG_CT_REG;
197 if (TCG_TARGET_REG_BITS == 64) {
198 tcg_regset_set32(ct->u.regs, 0, 0xffff);
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
201 } else {
202 tcg_regset_set32(ct->u.regs, 0, 0xff);
203 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
204 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
205 }
206 break;
207
208 case 'e':
209 ct->ct |= TCG_CT_CONST_S32;
210 break;
211 case 'Z':
212 ct->ct |= TCG_CT_CONST_U32;
213 break;
214
215 default:
216 return -1;
217 }
218 ct_str++;
219 *pct_str = ct_str;
220 return 0;
221}
222
223/* test if a constant matches the constraint */
224static inline int tcg_target_const_match(tcg_target_long val,
225 const TCGArgConstraint *arg_ct)
226{
227 int ct = arg_ct->ct;
228 if (ct & TCG_CT_CONST) {
229 return 1;
230 }
231 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
232 return 1;
233 }
234 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
235 return 1;
236 }
237 return 0;
238}
239
240#if TCG_TARGET_REG_BITS == 64
241# define LOWREGMASK(x) ((x) & 7)
242#else
243# define LOWREGMASK(x) (x)
244#endif
245
246#define P_EXT 0x100 /* 0x0f opcode prefix */
247#define P_DATA16 0x200 /* 0x66 opcode prefix */
248#if TCG_TARGET_REG_BITS == 64
249# define P_ADDR32 0x400 /* 0x67 opcode prefix */
250# define P_REXW 0x800 /* Set REX.W = 1 */
251# define P_REXB_R 0x1000 /* REG field as byte register */
252# define P_REXB_RM 0x2000 /* R/M field as byte register */
253#else
254# define P_ADDR32 0
255# define P_REXW 0
256# define P_REXB_R 0
257# define P_REXB_RM 0
258#endif
259
260#define OPC_ARITH_EvIz (0x81)
261#define OPC_ARITH_EvIb (0x83)
262#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
263#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
264#define OPC_BSWAP (0xc8 | P_EXT)
265#define OPC_CALL_Jz (0xe8)
266#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
267#define OPC_DEC_r32 (0x48)
268#define OPC_IMUL_GvEv (0xaf | P_EXT)
269#define OPC_IMUL_GvEvIb (0x6b)
270#define OPC_IMUL_GvEvIz (0x69)
271#define OPC_INC_r32 (0x40)
272#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
273#define OPC_JCC_short (0x70) /* ... plus condition code */
274#define OPC_JMP_long (0xe9)
275#define OPC_JMP_short (0xeb)
276#define OPC_LEA (0x8d)
277#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
278#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
279#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
280#define OPC_MOVL_EvIz (0xc7)
281#define OPC_MOVL_Iv (0xb8)
282#define OPC_MOVSBL (0xbe | P_EXT)
283#define OPC_MOVSWL (0xbf | P_EXT)
284#define OPC_MOVSLQ (0x63 | P_REXW)
285#define OPC_MOVZBL (0xb6 | P_EXT)
286#define OPC_MOVZWL (0xb7 | P_EXT)
287#define OPC_POP_r32 (0x58)
288#define OPC_PUSH_r32 (0x50)
289#define OPC_PUSH_Iv (0x68)
290#define OPC_PUSH_Ib (0x6a)
291#define OPC_RET (0xc3)
292#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293#define OPC_SHIFT_1 (0xd1)
294#define OPC_SHIFT_Ib (0xc1)
295#define OPC_SHIFT_cl (0xd3)
296#define OPC_TESTL (0x85)
297#define OPC_XCHG_ax_r32 (0x90)
298
299#define OPC_GRP3_Ev (0xf7)
300#define OPC_GRP5 (0xff)
301
302/* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304#define ARITH_ADD 0
305#define ARITH_OR 1
306#define ARITH_ADC 2
307#define ARITH_SBB 3
308#define ARITH_AND 4
309#define ARITH_SUB 5
310#define ARITH_XOR 6
311#define ARITH_CMP 7
312
313/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314#define SHIFT_ROL 0
315#define SHIFT_ROR 1
316#define SHIFT_SHL 4
317#define SHIFT_SHR 5
318#define SHIFT_SAR 7
319
320/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321#define EXT3_NOT 2
322#define EXT3_NEG 3
323#define EXT3_MUL 4
324#define EXT3_IMUL 5
325#define EXT3_DIV 6
326#define EXT3_IDIV 7
327
328/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329#define EXT5_INC_Ev 0
330#define EXT5_DEC_Ev 1
331#define EXT5_CALLN_Ev 2
332#define EXT5_JMPN_Ev 4
333
334/* Condition codes to be added to OPC_JCC_{long,short}. */
335#define JCC_JMP (-1)
336#define JCC_JO 0x0
337#define JCC_JNO 0x1
338#define JCC_JB 0x2
339#define JCC_JAE 0x3
340#define JCC_JE 0x4
341#define JCC_JNE 0x5
342#define JCC_JBE 0x6
343#define JCC_JA 0x7
344#define JCC_JS 0x8
345#define JCC_JNS 0x9
346#define JCC_JP 0xa
347#define JCC_JNP 0xb
348#define JCC_JL 0xc
349#define JCC_JGE 0xd
350#define JCC_JLE 0xe
351#define JCC_JG 0xf
352
353static const uint8_t tcg_cond_to_jcc[10] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364};
365
366#if defined(VBOX)
367/* Calc the size of the tcg_out_opc() result. */
368static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x)
369{
370 unsigned char len = 1;
371# if TCG_TARGET_REG_BITS == 64
372 unsigned rex;
373 rex = 0;
374 rex |= (opc & P_REXW) >> 8; /* REX.W */
375 rex |= (r & 8) >> 1; /* REX.R */
376 rex |= (x & 8) >> 2; /* REX.X */
377 rex |= (rm & 8) >> 3; /* REX.B */
378 rex |= opc & (r >= 4 ? P_REXB_R : 0);
379 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
380 if (rex) len++;
381 if (opc & P_ADDR32) len++;
382# endif
383 if (opc & P_DATA16) len++;
384 if (opc & P_EXT) len++;
385
386 return len;
387}
388#endif
389
390#if TCG_TARGET_REG_BITS == 64
391static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
392{
393 int rex;
394
395 if (opc & P_DATA16) {
396 /* We should never be asking for both 16 and 64-bit operation. */
397 assert((opc & P_REXW) == 0);
398 tcg_out8(s, 0x66);
399 }
400 if (opc & P_ADDR32) {
401 tcg_out8(s, 0x67);
402 }
403
404 rex = 0;
405 rex |= (opc & P_REXW) >> 8; /* REX.W */
406 rex |= (r & 8) >> 1; /* REX.R */
407 rex |= (x & 8) >> 2; /* REX.X */
408 rex |= (rm & 8) >> 3; /* REX.B */
409
410 /* P_REXB_{R,RM} indicates that the given register is the low byte.
411 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
412 as otherwise the encoding indicates %[abcd]h. Note that the values
413 that are ORed in merely indicate that the REX byte must be present;
414 those bits get discarded in output. */
415 rex |= opc & (r >= 4 ? P_REXB_R : 0);
416 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
417
418 if (rex) {
419 tcg_out8(s, (uint8_t)(rex | 0x40));
420 }
421
422 if (opc & P_EXT) {
423 tcg_out8(s, 0x0f);
424 }
425 tcg_out8(s, opc);
426}
427#else
428static void tcg_out_opc(TCGContext *s, int opc)
429{
430 if (opc & P_DATA16) {
431 tcg_out8(s, 0x66);
432 }
433 if (opc & P_EXT) {
434 tcg_out8(s, 0x0f);
435 }
436 tcg_out8(s, opc);
437}
438/* Discard the register arguments to tcg_out_opc early, so as not to penalize
439 the 32-bit compilation paths. This method works with all versions of gcc,
440 whereas relying on optimization may not be able to exclude them. */
441#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
442#endif
443
444static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
445{
446 tcg_out_opc(s, opc, r, rm, 0);
447 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
448}
449
450/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
451 We handle either RM and INDEX missing with a negative value. In 64-bit
452 mode for absolute addresses, ~RM is the size of the immediate operand
453 that will follow the instruction. */
454
455static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
456 int index, int shift,
457 tcg_target_long offset)
458{
459 int mod, len;
460
461 if (index < 0 && rm < 0) {
462 if (TCG_TARGET_REG_BITS == 64) {
463 /* Try for a rip-relative addressing mode. This has replaced
464 the 32-bit-mode absolute addressing encoding. */
465#ifdef VBOX
466 tcg_target_long pc = (tcg_target_long)s->code_ptr + tcg_calc_opc_len(s, opc, r, 0, 0) + 5;
467#else
468 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
469#endif
470 tcg_target_long disp = offset - pc;
471 if (disp == (int32_t)disp) {
472 tcg_out_opc(s, opc, r, 0, 0);
473 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
474 tcg_out32(s, disp);
475#ifdef VBOX
476 Assert(pc == (tcg_target_long)s->code_ptr);
477#endif
478 return;
479 }
480
481 /* Try for an absolute address encoding. This requires the
482 use of the MODRM+SIB encoding and is therefore larger than
483 rip-relative addressing. */
484 if (offset == (int32_t)offset) {
485 tcg_out_opc(s, opc, r, 0, 0);
486 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
487 tcg_out8(s, (4 << 3) | 5);
488 tcg_out32(s, offset);
489 return;
490 }
491
492 /* ??? The memory isn't directly addressable. */
493 tcg_abort();
494 } else {
495 /* Absolute address. */
496 tcg_out_opc(s, opc, r, 0, 0);
497 tcg_out8(s, (r << 3) | 5);
498 tcg_out32(s, offset);
499 return;
500 }
501 }
502
503 /* Find the length of the immediate addend. Note that the encoding
504 that would be used for (%ebp) indicates absolute addressing. */
505 if (rm < 0) {
506 mod = 0, len = 4, rm = 5;
507 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
508 mod = 0, len = 0;
509 } else if (offset == (int8_t)offset) {
510 mod = 0x40, len = 1;
511 } else {
512 mod = 0x80, len = 4;
513 }
514
515 /* Use a single byte MODRM format if possible. Note that the encoding
516 that would be used for %esp is the escape to the two byte form. */
517 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
518 /* Single byte MODRM format. */
519 tcg_out_opc(s, opc, r, rm, 0);
520 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
521 } else {
522 /* Two byte MODRM+SIB format. */
523
524 /* Note that the encoding that would place %esp into the index
525 field indicates no index register. In 64-bit mode, the REX.X
526 bit counts, so %r12 can be used as the index. */
527 if (index < 0) {
528 index = 4;
529 } else {
530 assert(index != TCG_REG_ESP);
531 }
532
533 tcg_out_opc(s, opc, r, rm, index);
534 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
535 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
536 }
537
538 if (len == 1) {
539 tcg_out8(s, offset);
540 } else if (len == 4) {
541 tcg_out32(s, offset);
542 }
543}
544
545/* A simplification of the above with no index or shift. */
546static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
547 int rm, tcg_target_long offset)
548{
549 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
550}
551
552/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
553static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
554{
555 /* Propagate an opcode prefix, such as P_REXW. */
556 int ext = subop & ~0x7;
557 subop &= 0x7;
558
559 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
560}
561
562static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
563{
564 if (arg != ret) {
565 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
566 tcg_out_modrm(s, opc, ret, arg);
567 }
568}
569
570static void tcg_out_movi(TCGContext *s, TCGType type,
571 int ret, tcg_target_long arg)
572{
573 if (arg == 0) {
574 tgen_arithr(s, ARITH_XOR, ret, ret);
575 return;
576 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
577 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
578 tcg_out32(s, arg);
579 } else if (arg == (int32_t)arg) {
580 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
581 tcg_out32(s, arg);
582 } else {
583 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
584 tcg_out32(s, arg);
585 tcg_out32(s, arg >> 31 >> 1);
586 }
587}
588
589static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
590{
591 if (val == (int8_t)val) {
592 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
593 tcg_out8(s, val);
594 } else if (val == (int32_t)val) {
595 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
596 tcg_out32(s, val);
597 } else {
598 tcg_abort();
599 }
600}
601
602static inline void tcg_out_push(TCGContext *s, int reg)
603{
604 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
605}
606
607static inline void tcg_out_pop(TCGContext *s, int reg)
608{
609 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
610}
611
612static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
613 int arg1, tcg_target_long arg2)
614{
615 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
616 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
617}
618
619static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
620 int arg1, tcg_target_long arg2)
621{
622 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
623 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
624}
625
626static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
627{
628 /* Propagate an opcode prefix, such as P_DATA16. */
629 int ext = subopc & ~0x7;
630 subopc &= 0x7;
631
632 if (count == 1) {
633 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
634 } else {
635 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
636 tcg_out8(s, count);
637 }
638}
639
640static inline void tcg_out_bswap32(TCGContext *s, int reg)
641{
642 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
643}
644
645static inline void tcg_out_rolw_8(TCGContext *s, int reg)
646{
647 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
648}
649
650static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
651{
652 /* movzbl */
653 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
654 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
655}
656
657static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
658{
659 /* movsbl */
660 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
661 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
662}
663
664static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
665{
666 /* movzwl */
667 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
668}
669
670static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
671{
672 /* movsw[lq] */
673 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
674}
675
676static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
677{
678 /* 32-bit mov zero extends. */
679 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
680}
681
682static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
683{
684 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
685}
686
687static inline void tcg_out_bswap64(TCGContext *s, int reg)
688{
689 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
690}
691
692static void tgen_arithi(TCGContext *s, int c, int r0,
693 tcg_target_long val, int cf)
694{
695 int rexw = 0;
696
697 if (TCG_TARGET_REG_BITS == 64) {
698 rexw = c & -8;
699 c &= 7;
700 }
701
702 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
703 partial flags update stalls on Pentium4 and are not recommended
704 by current Intel optimization manuals. */
705 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
706 int is_inc = (c == ARITH_ADD) ^ (val < 0);
707 if (TCG_TARGET_REG_BITS == 64) {
708 /* The single-byte increment encodings are re-tasked as the
709 REX prefixes. Use the MODRM encoding. */
710 tcg_out_modrm(s, OPC_GRP5 + rexw,
711 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
712 } else {
713 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
714 }
715 return;
716 }
717
718 if (c == ARITH_AND) {
719 if (TCG_TARGET_REG_BITS == 64) {
720 if (val == 0xffffffffu) {
721 tcg_out_ext32u(s, r0, r0);
722 return;
723 }
724 if (val == (uint32_t)val) {
725 /* AND with no high bits set can use a 32-bit operation. */
726 rexw = 0;
727 }
728 }
729 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
730 tcg_out_ext8u(s, r0, r0);
731 return;
732 }
733 if (val == 0xffffu) {
734 tcg_out_ext16u(s, r0, r0);
735 return;
736 }
737 }
738
739 if (val == (int8_t)val) {
740 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
741 tcg_out8(s, val);
742 return;
743 }
744 if (rexw == 0 || val == (int32_t)val) {
745 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
746 tcg_out32(s, val);
747 return;
748 }
749
750 tcg_abort();
751}
752
753static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
754{
755 if (val != 0) {
756 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
757 }
758}
759
760#ifdef VBOX
761static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
762{
763 if (val != 0) {
764 tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
765 }
766}
767#endif
768
769/* Use SMALL != 0 to force a short forward branch. */
770static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
771{
772 int32_t val, val1;
773 TCGLabel *l = &s->labels[label_index];
774
775 if (l->has_value) {
776 val = l->u.value - (tcg_target_long)s->code_ptr;
777 val1 = val - 2;
778 if ((int8_t)val1 == val1) {
779 if (opc == -1) {
780 tcg_out8(s, OPC_JMP_short);
781 } else {
782 tcg_out8(s, OPC_JCC_short + opc);
783 }
784 tcg_out8(s, val1);
785 } else {
786 if (small) {
787 tcg_abort();
788 }
789 if (opc == -1) {
790 tcg_out8(s, OPC_JMP_long);
791 tcg_out32(s, val - 5);
792 } else {
793 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
794 tcg_out32(s, val - 6);
795 }
796 }
797 } else if (small) {
798 if (opc == -1) {
799 tcg_out8(s, OPC_JMP_short);
800 } else {
801 tcg_out8(s, OPC_JCC_short + opc);
802 }
803 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
804 s->code_ptr += 1;
805 } else {
806 if (opc == -1) {
807 tcg_out8(s, OPC_JMP_long);
808 } else {
809 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
810 }
811 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
812 s->code_ptr += 4;
813 }
814}
815
816static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
817 int const_arg2, int rexw)
818{
819 if (const_arg2) {
820 if (arg2 == 0) {
821 /* test r, r */
822 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
823 } else {
824 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
825 }
826 } else {
827 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
828 }
829}
830
831static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
832 TCGArg arg1, TCGArg arg2, int const_arg2,
833 int label_index, int small)
834{
835 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
836 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
837}
838
839#if TCG_TARGET_REG_BITS == 64
840static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
841 TCGArg arg1, TCGArg arg2, int const_arg2,
842 int label_index, int small)
843{
844 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
845 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
846}
847#else
848/* XXX: we implement it at the target level to avoid having to
849 handle cross basic blocks temporaries */
850static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
851 const int *const_args, int small)
852{
853 int label_next;
854 label_next = gen_new_label();
855 switch(args[4]) {
856 case TCG_COND_EQ:
857 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
858 label_next, 1);
859 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
860 args[5], small);
861 break;
862 case TCG_COND_NE:
863 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
864 args[5], small);
865 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
866 args[5], small);
867 break;
868 case TCG_COND_LT:
869 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
870 args[5], small);
871 tcg_out_jxx(s, JCC_JNE, label_next, 1);
872 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
873 args[5], small);
874 break;
875 case TCG_COND_LE:
876 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
877 args[5], small);
878 tcg_out_jxx(s, JCC_JNE, label_next, 1);
879 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
880 args[5], small);
881 break;
882 case TCG_COND_GT:
883 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
884 args[5], small);
885 tcg_out_jxx(s, JCC_JNE, label_next, 1);
886 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
887 args[5], small);
888 break;
889 case TCG_COND_GE:
890 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
891 args[5], small);
892 tcg_out_jxx(s, JCC_JNE, label_next, 1);
893 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
894 args[5], small);
895 break;
896 case TCG_COND_LTU:
897 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
898 args[5], small);
899 tcg_out_jxx(s, JCC_JNE, label_next, 1);
900 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
901 args[5], small);
902 break;
903 case TCG_COND_LEU:
904 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
905 args[5], small);
906 tcg_out_jxx(s, JCC_JNE, label_next, 1);
907 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
908 args[5], small);
909 break;
910 case TCG_COND_GTU:
911 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
912 args[5], small);
913 tcg_out_jxx(s, JCC_JNE, label_next, 1);
914 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
915 args[5], small);
916 break;
917 case TCG_COND_GEU:
918 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
919 args[5], small);
920 tcg_out_jxx(s, JCC_JNE, label_next, 1);
921 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
922 args[5], small);
923 break;
924 default:
925 tcg_abort();
926 }
927 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
928}
929#endif
930
931static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
932 TCGArg arg1, TCGArg arg2, int const_arg2)
933{
934 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
935 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
936 tcg_out_ext8u(s, dest, dest);
937}
938
939#if TCG_TARGET_REG_BITS == 64
940static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
941 TCGArg arg1, TCGArg arg2, int const_arg2)
942{
943 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
944 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
945 tcg_out_ext8u(s, dest, dest);
946}
947#else
948static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
949 const int *const_args)
950{
951 TCGArg new_args[6];
952 int label_true, label_over;
953
954 memcpy(new_args, args+1, 5*sizeof(TCGArg));
955
956 if (args[0] == args[1] || args[0] == args[2]
957 || (!const_args[3] && args[0] == args[3])
958 || (!const_args[4] && args[0] == args[4])) {
959 /* When the destination overlaps with one of the argument
960 registers, don't do anything tricky. */
961 label_true = gen_new_label();
962 label_over = gen_new_label();
963
964 new_args[5] = label_true;
965 tcg_out_brcond2(s, new_args, const_args+1, 1);
966
967 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
968 tcg_out_jxx(s, JCC_JMP, label_over, 1);
969 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
970
971 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
972 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
973 } else {
974 /* When the destination does not overlap one of the arguments,
975 clear the destination first, jump if cond false, and emit an
976 increment in the true case. This results in smaller code. */
977
978 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
979
980 label_over = gen_new_label();
981 new_args[4] = tcg_invert_cond(new_args[4]);
982 new_args[5] = label_over;
983 tcg_out_brcond2(s, new_args, const_args+1, 1);
984
985 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
986 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
987 }
988}
989#endif
990
991static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
992{
993#ifdef VBOX
994 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr
995 - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
996#else
997 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
998#endif
999
1000 if (disp == (int32_t)disp) {
1001 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1002 tcg_out32(s, disp);
1003 } else {
1004 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1005 tcg_out_modrm(s, OPC_GRP5,
1006 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1007 }
1008}
1009
1010static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
1011{
1012#ifdef VBOX
1013 tcg_gen_stack_alignment_check(s);
1014#endif
1015 tcg_out_branch(s, 1, dest);
1016}
1017
1018static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
1019{
1020 tcg_out_branch(s, 0, dest);
1021}
1022
1023#if defined(CONFIG_SOFTMMU)
1024
1025#include "../../softmmu_defs.h"
1026
1027static void *qemu_ld_helpers[4] = {
1028 __ldb_mmu,
1029 __ldw_mmu,
1030 __ldl_mmu,
1031 __ldq_mmu,
1032};
1033
1034static void *qemu_st_helpers[4] = {
1035 __stb_mmu,
1036 __stw_mmu,
1037 __stl_mmu,
1038 __stq_mmu,
1039};
1040
1041/* Perform the TLB load and compare.
1042
1043 Inputs:
1044 ADDRLO_IDX contains the index into ARGS of the low part of the
1045 address; the high part of the address is at ADDR_LOW_IDX+1.
1046
1047 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1048
1049 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1050 This should be offsetof addr_read or addr_write.
1051
1052 Outputs:
1053 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1054 positions of the displacements of forward jumps to the TLB miss case.
1055
1056 First argument register is loaded with the low part of the address.
1057 In the TLB hit case, it has been adjusted as indicated by the TLB
1058 and so is a host address. In the TLB miss case, it continues to
1059 hold a guest address.
1060
1061 Second argument register is clobbered. */
1062
1063static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1064 int mem_index, int s_bits,
1065 const TCGArg *args,
1066 uint8_t **label_ptr, int which)
1067{
1068 const int addrlo = args[addrlo_idx];
1069 const int r0 = tcg_target_call_iarg_regs[0];
1070 const int r1 = tcg_target_call_iarg_regs[1];
1071 TCGType type = TCG_TYPE_I32;
1072 int rexw = 0;
1073
1074 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1075 type = TCG_TYPE_I64;
1076 rexw = P_REXW;
1077 }
1078
1079 tcg_out_mov(s, type, r1, addrlo);
1080 tcg_out_mov(s, type, r0, addrlo);
1081
1082 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1083 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1084
1085 tgen_arithi(s, ARITH_AND + rexw, r0,
1086 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1087 tgen_arithi(s, ARITH_AND + rexw, r1,
1088 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1089
1090 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1091 offsetof(CPUState, tlb_table[mem_index][0])
1092 + which);
1093
1094 /* cmp 0(r1), r0 */
1095 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1096
1097 tcg_out_mov(s, type, r0, addrlo);
1098
1099 /* jne label1 */
1100 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1101 label_ptr[0] = s->code_ptr;
1102 s->code_ptr++;
1103
1104 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1105 /* cmp 4(r1), addrhi */
1106 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1107
1108 /* jne label1 */
1109 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1110 label_ptr[1] = s->code_ptr;
1111 s->code_ptr++;
1112 }
1113
1114 /* TLB Hit. */
1115
1116 /* add addend(r1), r0 */
1117 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1118 offsetof(CPUTLBEntry, addend) - which);
1119}
1120#endif
1121
1122static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1123 int base, tcg_target_long ofs, int sizeop)
1124{
1125#ifdef TARGET_WORDS_BIGENDIAN
1126 const int bswap = 1;
1127#else
1128 const int bswap = 0;
1129#endif
1130 switch (sizeop) {
1131 case 0:
1132 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1133 break;
1134 case 0 | 4:
1135 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1136 break;
1137 case 1:
1138 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1139 if (bswap) {
1140 tcg_out_rolw_8(s, datalo);
1141 }
1142 break;
1143 case 1 | 4:
1144 if (bswap) {
1145 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1146 tcg_out_rolw_8(s, datalo);
1147 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1148 } else {
1149 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1150 }
1151 break;
1152 case 2:
1153 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1154 if (bswap) {
1155 tcg_out_bswap32(s, datalo);
1156 }
1157 break;
1158#if TCG_TARGET_REG_BITS == 64
1159 case 2 | 4:
1160 if (bswap) {
1161 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1162 tcg_out_bswap32(s, datalo);
1163 tcg_out_ext32s(s, datalo, datalo);
1164 } else {
1165 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1166 }
1167 break;
1168#endif
1169 case 3:
1170 if (TCG_TARGET_REG_BITS == 64) {
1171 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1172 if (bswap) {
1173 tcg_out_bswap64(s, datalo);
1174 }
1175 } else {
1176 if (bswap) {
1177 int t = datalo;
1178 datalo = datahi;
1179 datahi = t;
1180 }
1181 if (base != datalo) {
1182 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1183 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1184 } else {
1185 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1186 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1187 }
1188 if (bswap) {
1189 tcg_out_bswap32(s, datalo);
1190 tcg_out_bswap32(s, datahi);
1191 }
1192 }
1193 break;
1194 default:
1195 tcg_abort();
1196 }
1197}
1198
1199#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
1200
1201static void * const vbox_ld_helpers[] = {
1202 __ldub_vbox_phys,
1203 __lduw_vbox_phys,
1204 __ldul_vbox_phys,
1205 __ldq_vbox_phys,
1206 __ldb_vbox_phys,
1207 __ldw_vbox_phys,
1208 __ldl_vbox_phys,
1209 __ldq_vbox_phys,
1210};
1211
1212static void * const vbox_st_helpers[] = {
1213 __stb_vbox_phys,
1214 __stw_vbox_phys,
1215 __stl_vbox_phys,
1216 __stq_vbox_phys
1217};
1218
1219DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
1220{
1221 intptr_t disp;
1222# ifdef VBOX
1223 tcg_gen_stack_alignment_check(s);
1224# endif
1225 disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
1226 tcg_out8(s, 0xe8); /* call disp32 */
1227 tcg_out32(s, disp); /* disp32 */
1228}
1229
1230static void tcg_out_vbox_phys_read(TCGContext *s, int index,
1231 int addr_reg,
1232 int data_reg, int data_reg2)
1233{
1234 int useReg2 = ((index & 3) == 3);
1235
1236 /** @todo: should we make phys address accessors fastcalls - probably not a big deal */
1237 /* out parameter (address), note that phys address is always 64-bit */
1238 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1239
1240# if 0
1241 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1242 tcg_out_push(s, addr_reg);
1243# else
1244 /* mov addr_reg, %eax */
1245 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1246# endif
1247
1248 tcg_out_long_call(s, vbox_ld_helpers[index]);
1249
1250 /* mov %eax, data_reg */
1251 tcg_out_mov(s, data_reg, TCG_REG_EAX);
1252
1253 /* returned 64-bit value */
1254 if (useReg2)
1255 tcg_out_mov(s, data_reg2, TCG_REG_EDX);
1256}
1257
1258static void tcg_out_vbox_phys_write(TCGContext *s, int index,
1259 int addr_reg,
1260 int val_reg, int val_reg2) {
1261 int useReg2 = ((index & 3) == 3);
1262
1263# if 0
1264 /* out parameter (value2) */
1265 if (useReg2)
1266 tcg_out_push(s, val_reg2);
1267 /* out parameter (value) */
1268 tcg_out_push(s, val_reg);
1269 /* out parameter (address), note that phys address is always 64-bit */
1270 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1271 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1272 tcg_out_push(s, addr_reg);
1273# else
1274 Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
1275 /* mov addr_reg, %eax */
1276 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1277 Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
1278 /* mov val_reg, %edx */
1279 tcg_out_mov(s, TCG_REG_EDX, val_reg);
1280 if (useReg2)
1281 tcg_out_mov(s, TCG_REG_ECX, val_reg2);
1282
1283# endif
1284 /* call it */
1285 tcg_out_long_call(s, vbox_st_helpers[index]);
1286
1287 /* clean stack after us */
1288# if 0
1289 tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
1290# endif
1291}
1292
1293#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
1294
1295/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1296 EAX. It will be useful once fixed registers globals are less
1297 common. */
1298static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1299 int opc)
1300{
1301 int data_reg, data_reg2 = 0;
1302 int addrlo_idx;
1303#if defined(CONFIG_SOFTMMU)
1304 int mem_index, s_bits, arg_idx;
1305 uint8_t *label_ptr[3];
1306#endif
1307
1308 data_reg = args[0];
1309 addrlo_idx = 1;
1310 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1311 data_reg2 = args[1];
1312 addrlo_idx = 2;
1313 }
1314
1315#if defined(CONFIG_SOFTMMU)
1316 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1317 s_bits = opc & 3;
1318
1319 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1320 label_ptr, offsetof(CPUTLBEntry, addr_read));
1321
1322 /* TLB Hit. */
1323 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1324 tcg_target_call_iarg_regs[0], 0, opc);
1325
1326 /* jmp label2 */
1327 tcg_out8(s, OPC_JMP_short);
1328 label_ptr[2] = s->code_ptr;
1329 s->code_ptr++;
1330
1331 /* TLB Miss. */
1332
1333 /* label1: */
1334 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1335 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1336 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1337 }
1338
1339 /* XXX: move that code at the end of the TB */
1340 /* The first argument is already loaded with addrlo. */
1341 arg_idx = 1;
1342 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1343 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1344 args[addrlo_idx + 1]);
1345 }
1346 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1347 mem_index);
1348 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1349
1350 switch(opc) {
1351 case 0 | 4:
1352 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1353 break;
1354 case 1 | 4:
1355 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1356 break;
1357 case 0:
1358 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1359 break;
1360 case 1:
1361 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1362 break;
1363 case 2:
1364 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1365 break;
1366#if TCG_TARGET_REG_BITS == 64
1367 case 2 | 4:
1368 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1369 break;
1370#endif
1371 case 3:
1372 if (TCG_TARGET_REG_BITS == 64) {
1373 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1374 } else if (data_reg == TCG_REG_EDX) {
1375 /* xchg %edx, %eax */
1376 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1377 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1378 } else {
1379 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1380 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1381 }
1382 break;
1383 default:
1384 tcg_abort();
1385 }
1386
1387 /* label2: */
1388 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1389#else
1390 {
1391 int32_t offset = GUEST_BASE;
1392 int base = args[addrlo_idx];
1393
1394 if (TCG_TARGET_REG_BITS == 64) {
1395 /* ??? We assume all operations have left us with register
1396 contents that are zero extended. So far this appears to
1397 be true. If we want to enforce this, we can either do
1398 an explicit zero-extension here, or (if GUEST_BASE == 0)
1399 use the ADDR32 prefix. For now, do nothing. */
1400
1401 if (offset != GUEST_BASE) {
1402 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1403 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1404 base = TCG_REG_RDI, offset = 0;
1405 }
1406 }
1407
1408 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1409 }
1410#endif
1411}
1412
1413static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1414 int base, tcg_target_long ofs, int sizeop)
1415{
1416#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1417#ifdef TARGET_WORDS_BIGENDIAN
1418 const int bswap = 1;
1419#else
1420 const int bswap = 0;
1421#endif
1422 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1423 we could perform the bswap twice to restore the original value
1424 instead of moving to the scratch. But as it is, the L constraint
1425 means that the second argument reg is definitely free here. */
1426 int scratch = tcg_target_call_iarg_regs[1];
1427
1428 switch (sizeop) {
1429 case 0:
1430 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1431 break;
1432 case 1:
1433 if (bswap) {
1434 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1435 tcg_out_rolw_8(s, scratch);
1436 datalo = scratch;
1437 }
1438 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1439 break;
1440 case 2:
1441 if (bswap) {
1442 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1443 tcg_out_bswap32(s, scratch);
1444 datalo = scratch;
1445 }
1446 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1447 break;
1448 case 3:
1449 if (TCG_TARGET_REG_BITS == 64) {
1450 if (bswap) {
1451 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1452 tcg_out_bswap64(s, scratch);
1453 datalo = scratch;
1454 }
1455 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1456 } else if (bswap) {
1457 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1458 tcg_out_bswap32(s, scratch);
1459 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1460 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1461 tcg_out_bswap32(s, scratch);
1462 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1463 } else {
1464 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1465 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1466 }
1467 break;
1468 default:
1469 tcg_abort();
1470 }
1471#else /* VBOX */
1472# error "broken"
1473 tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
1474#endif
1475}
1476
1477static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1478 int opc)
1479{
1480 int data_reg, data_reg2 = 0;
1481 int addrlo_idx;
1482#if defined(CONFIG_SOFTMMU)
1483 int mem_index, s_bits;
1484 int stack_adjust;
1485 uint8_t *label_ptr[3];
1486#endif
1487
1488 data_reg = args[0];
1489 addrlo_idx = 1;
1490 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1491 data_reg2 = args[1];
1492 addrlo_idx = 2;
1493 }
1494
1495#if defined(CONFIG_SOFTMMU)
1496 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1497 s_bits = opc;
1498
1499 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1500 label_ptr, offsetof(CPUTLBEntry, addr_write));
1501
1502 /* TLB Hit. */
1503 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1504 tcg_target_call_iarg_regs[0], 0, opc);
1505
1506 /* jmp label2 */
1507 tcg_out8(s, OPC_JMP_short);
1508 label_ptr[2] = s->code_ptr;
1509 s->code_ptr++;
1510
1511 /* TLB Miss. */
1512
1513 /* label1: */
1514 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1515 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1516 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1517 }
1518
1519#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1520
1521 /* XXX: move that code at the end of the TB */
1522 if (TCG_TARGET_REG_BITS == 64) {
1523 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1524 TCG_REG_RSI, data_reg);
1525 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1526 stack_adjust = 0;
1527 } else if (TARGET_LONG_BITS == 32) {
1528 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1529 if (opc == 3) {
1530 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1531 tcg_out_pushi(s, mem_index);
1532 stack_adjust = 4;
1533 } else {
1534 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1535 stack_adjust = 0;
1536 }
1537 } else {
1538 if (opc == 3) {
1539 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1540 tcg_out_pushi(s, mem_index);
1541 tcg_out_push(s, data_reg2);
1542 tcg_out_push(s, data_reg);
1543 stack_adjust = 12;
1544 } else {
1545 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1546 switch(opc) {
1547 case 0:
1548 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1549 break;
1550 case 1:
1551 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1552 break;
1553 case 2:
1554 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1555 break;
1556 }
1557 tcg_out_pushi(s, mem_index);
1558 stack_adjust = 4;
1559 }
1560 }
1561
1562 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1563
1564 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1565 /* Pop and discard. This is 2 bytes smaller than the add. */
1566 tcg_out_pop(s, TCG_REG_ECX);
1567 } else if (stack_adjust != 0) {
1568 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1569 }
1570
1571#else /* VBOX && REM_PHYS_ADDR_IN_TLB */
1572# error Borked
1573 tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
1574#endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
1575
1576 /* label2: */
1577 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1578#else
1579 {
1580 int32_t offset = GUEST_BASE;
1581 int base = args[addrlo_idx];
1582
1583 if (TCG_TARGET_REG_BITS == 64) {
1584 /* ??? We assume all operations have left us with register
1585 contents that are zero extended. So far this appears to
1586 be true. If we want to enforce this, we can either do
1587 an explicit zero-extension here, or (if GUEST_BASE == 0)
1588 use the ADDR32 prefix. For now, do nothing. */
1589
1590 if (offset != GUEST_BASE) {
1591 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1592 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1593 base = TCG_REG_RDI, offset = 0;
1594 }
1595 }
1596
1597 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1598 }
1599#endif
1600}
1601
1602static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1603 const TCGArg *args, const int *const_args)
1604{
1605 int c, rexw = 0;
1606
1607#if TCG_TARGET_REG_BITS == 64
1608# define OP_32_64(x) \
1609 case glue(glue(INDEX_op_, x), _i64): \
1610 rexw = P_REXW; /* FALLTHRU */ \
1611 case glue(glue(INDEX_op_, x), _i32)
1612#else
1613# define OP_32_64(x) \
1614 case glue(glue(INDEX_op_, x), _i32)
1615#endif
1616
1617 switch(opc) {
1618 case INDEX_op_exit_tb:
1619 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1620 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1621 break;
1622 case INDEX_op_goto_tb:
1623 if (s->tb_jmp_offset) {
1624 /* direct jump method */
1625 tcg_out8(s, OPC_JMP_long); /* jmp im */
1626 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1627 tcg_out32(s, 0);
1628 } else {
1629 /* indirect jump method */
1630 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1631 (tcg_target_long)(s->tb_next + args[0]));
1632 }
1633 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1634 break;
1635 case INDEX_op_call:
1636 if (const_args[0]) {
1637 tcg_out_calli(s, args[0]);
1638 } else {
1639 /* call *reg */
1640 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1641 }
1642 break;
1643 case INDEX_op_jmp:
1644 if (const_args[0]) {
1645 tcg_out_jmp(s, args[0]);
1646 } else {
1647 /* jmp *reg */
1648 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1649 }
1650 break;
1651 case INDEX_op_br:
1652 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1653 break;
1654 case INDEX_op_movi_i32:
1655 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1656 break;
1657 OP_32_64(ld8u):
1658 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1659 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1660 break;
1661 OP_32_64(ld8s):
1662 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1663 break;
1664 OP_32_64(ld16u):
1665 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1666 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1667 break;
1668 OP_32_64(ld16s):
1669 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1670 break;
1671#if TCG_TARGET_REG_BITS == 64
1672 case INDEX_op_ld32u_i64:
1673#endif
1674 case INDEX_op_ld_i32:
1675 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1676 break;
1677
1678 OP_32_64(st8):
1679 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1680 args[0], args[1], args[2]);
1681 break;
1682 OP_32_64(st16):
1683 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1684 args[0], args[1], args[2]);
1685 break;
1686#if TCG_TARGET_REG_BITS == 64
1687 case INDEX_op_st32_i64:
1688#endif
1689 case INDEX_op_st_i32:
1690 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1691 break;
1692
1693 OP_32_64(add):
1694 /* For 3-operand addition, use LEA. */
1695 if (args[0] != args[1]) {
1696 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1697
1698 if (const_args[2]) {
1699 c3 = a2, a2 = -1;
1700 } else if (a0 == a2) {
1701 /* Watch out for dest = src + dest, since we've removed
1702 the matching constraint on the add. */
1703 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1704 break;
1705 }
1706
1707 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1708 break;
1709 }
1710 c = ARITH_ADD;
1711 goto gen_arith;
1712 OP_32_64(sub):
1713 c = ARITH_SUB;
1714 goto gen_arith;
1715 OP_32_64(and):
1716 c = ARITH_AND;
1717 goto gen_arith;
1718 OP_32_64(or):
1719 c = ARITH_OR;
1720 goto gen_arith;
1721 OP_32_64(xor):
1722 c = ARITH_XOR;
1723 goto gen_arith;
1724 gen_arith:
1725 if (const_args[2]) {
1726 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1727 } else {
1728 tgen_arithr(s, c + rexw, args[0], args[2]);
1729 }
1730 break;
1731
1732 OP_32_64(mul):
1733 if (const_args[2]) {
1734 int32_t val;
1735 val = args[2];
1736 if (val == (int8_t)val) {
1737 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1738 tcg_out8(s, val);
1739 } else {
1740 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1741 tcg_out32(s, val);
1742 }
1743 } else {
1744 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1745 }
1746 break;
1747
1748 OP_32_64(div2):
1749 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1750 break;
1751 OP_32_64(divu2):
1752 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1753 break;
1754
1755 OP_32_64(shl):
1756 c = SHIFT_SHL;
1757 goto gen_shift;
1758 OP_32_64(shr):
1759 c = SHIFT_SHR;
1760 goto gen_shift;
1761 OP_32_64(sar):
1762 c = SHIFT_SAR;
1763 goto gen_shift;
1764 OP_32_64(rotl):
1765 c = SHIFT_ROL;
1766 goto gen_shift;
1767 OP_32_64(rotr):
1768 c = SHIFT_ROR;
1769 goto gen_shift;
1770 gen_shift:
1771 if (const_args[2]) {
1772 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1773 } else {
1774 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1775 }
1776 break;
1777
1778 case INDEX_op_brcond_i32:
1779 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1780 args[3], 0);
1781 break;
1782 case INDEX_op_setcond_i32:
1783 tcg_out_setcond32(s, args[3], args[0], args[1],
1784 args[2], const_args[2]);
1785 break;
1786
1787 OP_32_64(bswap16):
1788 tcg_out_rolw_8(s, args[0]);
1789 break;
1790 OP_32_64(bswap32):
1791 tcg_out_bswap32(s, args[0]);
1792 break;
1793
1794 OP_32_64(neg):
1795 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1796 break;
1797 OP_32_64(not):
1798 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1799 break;
1800
1801 OP_32_64(ext8s):
1802 tcg_out_ext8s(s, args[0], args[1], rexw);
1803 break;
1804 OP_32_64(ext16s):
1805 tcg_out_ext16s(s, args[0], args[1], rexw);
1806 break;
1807 OP_32_64(ext8u):
1808 tcg_out_ext8u(s, args[0], args[1]);
1809 break;
1810 OP_32_64(ext16u):
1811 tcg_out_ext16u(s, args[0], args[1]);
1812 break;
1813
1814 case INDEX_op_qemu_ld8u:
1815 tcg_out_qemu_ld(s, args, 0);
1816 break;
1817 case INDEX_op_qemu_ld8s:
1818 tcg_out_qemu_ld(s, args, 0 | 4);
1819 break;
1820 case INDEX_op_qemu_ld16u:
1821 tcg_out_qemu_ld(s, args, 1);
1822 break;
1823 case INDEX_op_qemu_ld16s:
1824 tcg_out_qemu_ld(s, args, 1 | 4);
1825 break;
1826#if TCG_TARGET_REG_BITS == 64
1827 case INDEX_op_qemu_ld32u:
1828#endif
1829 case INDEX_op_qemu_ld32:
1830 tcg_out_qemu_ld(s, args, 2);
1831 break;
1832 case INDEX_op_qemu_ld64:
1833 tcg_out_qemu_ld(s, args, 3);
1834 break;
1835
1836 case INDEX_op_qemu_st8:
1837 tcg_out_qemu_st(s, args, 0);
1838 break;
1839 case INDEX_op_qemu_st16:
1840 tcg_out_qemu_st(s, args, 1);
1841 break;
1842 case INDEX_op_qemu_st32:
1843 tcg_out_qemu_st(s, args, 2);
1844 break;
1845 case INDEX_op_qemu_st64:
1846 tcg_out_qemu_st(s, args, 3);
1847 break;
1848
1849#if TCG_TARGET_REG_BITS == 32
1850 case INDEX_op_brcond2_i32:
1851 tcg_out_brcond2(s, args, const_args, 0);
1852 break;
1853 case INDEX_op_setcond2_i32:
1854 tcg_out_setcond2(s, args, const_args);
1855 break;
1856 case INDEX_op_mulu2_i32:
1857 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1858 break;
1859 case INDEX_op_add2_i32:
1860 if (const_args[4]) {
1861 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1862 } else {
1863 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1864 }
1865 if (const_args[5]) {
1866 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1867 } else {
1868 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1869 }
1870 break;
1871 case INDEX_op_sub2_i32:
1872 if (const_args[4]) {
1873 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1874 } else {
1875 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1876 }
1877 if (const_args[5]) {
1878 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1879 } else {
1880 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1881 }
1882 break;
1883#else /* TCG_TARGET_REG_BITS == 64 */
1884 case INDEX_op_movi_i64:
1885 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1886 break;
1887 case INDEX_op_ld32s_i64:
1888 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1889 break;
1890 case INDEX_op_ld_i64:
1891 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1892 break;
1893 case INDEX_op_st_i64:
1894 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1895 break;
1896 case INDEX_op_qemu_ld32s:
1897 tcg_out_qemu_ld(s, args, 2 | 4);
1898 break;
1899
1900 case INDEX_op_brcond_i64:
1901 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1902 args[3], 0);
1903 break;
1904 case INDEX_op_setcond_i64:
1905 tcg_out_setcond64(s, args[3], args[0], args[1],
1906 args[2], const_args[2]);
1907 break;
1908
1909 case INDEX_op_bswap64_i64:
1910 tcg_out_bswap64(s, args[0]);
1911 break;
1912 case INDEX_op_ext32u_i64:
1913 tcg_out_ext32u(s, args[0], args[1]);
1914 break;
1915 case INDEX_op_ext32s_i64:
1916 tcg_out_ext32s(s, args[0], args[1]);
1917 break;
1918#endif
1919
1920 default:
1921 tcg_abort();
1922 }
1923
1924#undef OP_32_64
1925}
1926
1927static const TCGTargetOpDef x86_op_defs[] = {
1928 { INDEX_op_exit_tb, { } },
1929 { INDEX_op_goto_tb, { } },
1930 { INDEX_op_call, { "ri" } },
1931 { INDEX_op_jmp, { "ri" } },
1932 { INDEX_op_br, { } },
1933 { INDEX_op_mov_i32, { "r", "r" } },
1934 { INDEX_op_movi_i32, { "r" } },
1935 { INDEX_op_ld8u_i32, { "r", "r" } },
1936 { INDEX_op_ld8s_i32, { "r", "r" } },
1937 { INDEX_op_ld16u_i32, { "r", "r" } },
1938 { INDEX_op_ld16s_i32, { "r", "r" } },
1939 { INDEX_op_ld_i32, { "r", "r" } },
1940 { INDEX_op_st8_i32, { "q", "r" } },
1941 { INDEX_op_st16_i32, { "r", "r" } },
1942 { INDEX_op_st_i32, { "r", "r" } },
1943
1944 { INDEX_op_add_i32, { "r", "r", "ri" } },
1945 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1946 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1947 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1948 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1949 { INDEX_op_and_i32, { "r", "0", "ri" } },
1950 { INDEX_op_or_i32, { "r", "0", "ri" } },
1951 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1952
1953 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1954 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1955 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1956 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1957 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1958
1959 { INDEX_op_brcond_i32, { "r", "ri" } },
1960
1961 { INDEX_op_bswap16_i32, { "r", "0" } },
1962 { INDEX_op_bswap32_i32, { "r", "0" } },
1963
1964 { INDEX_op_neg_i32, { "r", "0" } },
1965
1966 { INDEX_op_not_i32, { "r", "0" } },
1967
1968 { INDEX_op_ext8s_i32, { "r", "q" } },
1969 { INDEX_op_ext16s_i32, { "r", "r" } },
1970 { INDEX_op_ext8u_i32, { "r", "q" } },
1971 { INDEX_op_ext16u_i32, { "r", "r" } },
1972
1973 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1974
1975#if TCG_TARGET_REG_BITS == 32
1976 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1977 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1978 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1979 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1980 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1981#else
1982 { INDEX_op_mov_i64, { "r", "r" } },
1983 { INDEX_op_movi_i64, { "r" } },
1984 { INDEX_op_ld8u_i64, { "r", "r" } },
1985 { INDEX_op_ld8s_i64, { "r", "r" } },
1986 { INDEX_op_ld16u_i64, { "r", "r" } },
1987 { INDEX_op_ld16s_i64, { "r", "r" } },
1988 { INDEX_op_ld32u_i64, { "r", "r" } },
1989 { INDEX_op_ld32s_i64, { "r", "r" } },
1990 { INDEX_op_ld_i64, { "r", "r" } },
1991 { INDEX_op_st8_i64, { "r", "r" } },
1992 { INDEX_op_st16_i64, { "r", "r" } },
1993 { INDEX_op_st32_i64, { "r", "r" } },
1994 { INDEX_op_st_i64, { "r", "r" } },
1995
1996 { INDEX_op_add_i64, { "r", "0", "re" } },
1997 { INDEX_op_mul_i64, { "r", "0", "re" } },
1998 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1999 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2000 { INDEX_op_sub_i64, { "r", "0", "re" } },
2001 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2002 { INDEX_op_or_i64, { "r", "0", "re" } },
2003 { INDEX_op_xor_i64, { "r", "0", "re" } },
2004
2005 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2006 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2007 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2008 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2009 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2010
2011 { INDEX_op_brcond_i64, { "r", "re" } },
2012 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2013
2014 { INDEX_op_bswap16_i64, { "r", "0" } },
2015 { INDEX_op_bswap32_i64, { "r", "0" } },
2016 { INDEX_op_bswap64_i64, { "r", "0" } },
2017 { INDEX_op_neg_i64, { "r", "0" } },
2018 { INDEX_op_not_i64, { "r", "0" } },
2019
2020 { INDEX_op_ext8s_i64, { "r", "r" } },
2021 { INDEX_op_ext16s_i64, { "r", "r" } },
2022 { INDEX_op_ext32s_i64, { "r", "r" } },
2023 { INDEX_op_ext8u_i64, { "r", "r" } },
2024 { INDEX_op_ext16u_i64, { "r", "r" } },
2025 { INDEX_op_ext32u_i64, { "r", "r" } },
2026#endif
2027
2028#if TCG_TARGET_REG_BITS == 64
2029 { INDEX_op_qemu_ld8u, { "r", "L" } },
2030 { INDEX_op_qemu_ld8s, { "r", "L" } },
2031 { INDEX_op_qemu_ld16u, { "r", "L" } },
2032 { INDEX_op_qemu_ld16s, { "r", "L" } },
2033 { INDEX_op_qemu_ld32, { "r", "L" } },
2034 { INDEX_op_qemu_ld32u, { "r", "L" } },
2035 { INDEX_op_qemu_ld32s, { "r", "L" } },
2036 { INDEX_op_qemu_ld64, { "r", "L" } },
2037
2038 { INDEX_op_qemu_st8, { "L", "L" } },
2039 { INDEX_op_qemu_st16, { "L", "L" } },
2040 { INDEX_op_qemu_st32, { "L", "L" } },
2041 { INDEX_op_qemu_st64, { "L", "L" } },
2042#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2043 { INDEX_op_qemu_ld8u, { "r", "L" } },
2044 { INDEX_op_qemu_ld8s, { "r", "L" } },
2045 { INDEX_op_qemu_ld16u, { "r", "L" } },
2046 { INDEX_op_qemu_ld16s, { "r", "L" } },
2047 { INDEX_op_qemu_ld32, { "r", "L" } },
2048 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2049
2050 { INDEX_op_qemu_st8, { "cb", "L" } },
2051 { INDEX_op_qemu_st16, { "L", "L" } },
2052 { INDEX_op_qemu_st32, { "L", "L" } },
2053 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2054#else
2055 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2056 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2057 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2058 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2059 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2060 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2061
2062 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2063 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2064 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2065 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2066#endif
2067 { -1 },
2068};
2069
2070static int tcg_target_callee_save_regs[] = {
2071#if TCG_TARGET_REG_BITS == 64
2072 TCG_REG_RBP,
2073 TCG_REG_RBX,
2074 TCG_REG_R12,
2075 TCG_REG_R13,
2076 /* TCG_REG_R14, */ /* Currently used for the global env. */
2077 TCG_REG_R15,
2078#else
2079# ifndef VBOX
2080 /* TCG_REG_EBP, */ /* Currently used for the global env. */
2081 TCG_REG_EBX,
2082 TCG_REG_ESI,
2083 TCG_REG_EDI,
2084# else
2085 TCG_REG_EBP,
2086 TCG_REG_EBX,
2087 /* TCG_REG_ESI, */ /* Currently used for the global env. */
2088 TCG_REG_EDI,
2089# endif
2090#endif
2091};
2092
2093/* Generate global QEMU prologue and epilogue code */
2094static void tcg_target_qemu_prologue(TCGContext *s)
2095{
2096 int i, frame_size, push_size, stack_addend;
2097
2098 /* TB prologue */
2099
2100 /* Save all callee saved registers. */
2101 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2102 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2103 }
2104# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
2105 tcg_out8(s, 0x31); /* xor ebp, ebp */
2106 tcg_out8(s, 0xed);
2107# endif
2108
2109 /* Reserve some stack space. */
2110 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
2111 push_size *= TCG_TARGET_REG_BITS / 8;
2112
2113 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
2114 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
2115 ~(TCG_TARGET_STACK_ALIGN - 1);
2116 stack_addend = frame_size - push_size;
2117 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2118
2119 /* jmp *tb. */
2120 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
2121# ifdef VBOX
2122 tcg_gen_stack_alignment_check(s);
2123# endif
2124
2125 tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
2126
2127 /* TB epilogue */
2128 tb_ret_addr = s->code_ptr;
2129
2130 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
2131
2132 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2133 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2134 }
2135 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2136}
2137
2138static void tcg_target_init(TCGContext *s)
2139{
2140#if !defined(CONFIG_USER_ONLY)
2141 /* fail safe */
2142 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2143 tcg_abort();
2144#endif
2145
2146 if (TCG_TARGET_REG_BITS == 64) {
2147 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2148 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2149 } else {
2150 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2151 }
2152
2153 tcg_regset_clear(tcg_target_call_clobber_regs);
2154 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2155 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2157 if (TCG_TARGET_REG_BITS == 64) {
2158 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2159 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2162 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2163 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2164 }
2165
2166 tcg_regset_clear(s->reserved_regs);
2167 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
2168
2169 tcg_add_target_add_op_defs(x86_op_defs);
2170}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette