VirtualBox

source: vbox/trunk/src/recompiler/tcg/i386/tcg-target.c@ 62193

Last change on this file since 62193 was 42602, checked in by vboxsync, 12 years ago

mac build fix.

  • Property svn:eol-style set to native
File size: 67.5 KB
Line 
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46# if !defined(VBOX) || !defined(__MINGW64__)
47 TCG_REG_R9,
48 TCG_REG_R8,
49 TCG_REG_RCX,
50 TCG_REG_RDX,
51# endif
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54# if defined(VBOX) && defined(__MINGW64__)
55 TCG_REG_R9,
56 TCG_REG_R8,
57 TCG_REG_RDX,
58 TCG_REG_RCX,
59# endif
60 TCG_REG_RAX,
61#else
62 TCG_REG_EBX,
63 TCG_REG_ESI,
64 TCG_REG_EDI,
65 TCG_REG_EBP,
66 TCG_REG_ECX,
67 TCG_REG_EDX,
68 TCG_REG_EAX,
69#endif
70};
71
72static const int tcg_target_call_iarg_regs[] = {
73#if TCG_TARGET_REG_BITS == 64
74# if defined(VBOX) && defined(__MINGW64__)
75 TCG_REG_RCX,
76 TCG_REG_RDX,
77# else
78 TCG_REG_RDI,
79 TCG_REG_RSI,
80 TCG_REG_RDX,
81 TCG_REG_RCX,
82# endif
83 TCG_REG_R8,
84 TCG_REG_R9,
85#else
86 TCG_REG_EAX,
87 TCG_REG_EDX,
88 TCG_REG_ECX
89#endif
90};
91
92static const int tcg_target_call_oarg_regs[2] = {
93 TCG_REG_EAX,
94 TCG_REG_EDX
95};
96
97static uint8_t *tb_ret_addr;
98
99static void patch_reloc(uint8_t *code_ptr, int type,
100 tcg_target_long value, tcg_target_long addend)
101{
102 value += addend;
103 switch(type) {
104 case R_386_PC32:
105 value -= (uintptr_t)code_ptr;
106 if (value != (int32_t)value) {
107 tcg_abort();
108 }
109 *(uint32_t *)code_ptr = value;
110 break;
111 case R_386_PC8:
112 value -= (uintptr_t)code_ptr;
113 if (value != (int8_t)value) {
114 tcg_abort();
115 }
116 *(uint8_t *)code_ptr = value;
117 break;
118 default:
119 tcg_abort();
120 }
121}
122
123#ifdef VBOX
124/* emits stack alignment checks for strict builds. */
125DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
126{
127# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
128 tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
129 tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
130 tcg_out8(s, 0x74); /* jz imm8 */
131 tcg_out8(s, 1); /* $+3 (over int3) */
132 tcg_out8(s, 0xcc); /* int3 */
133# else
134 NOREF(s);
135# endif
136}
137#endif /* VBOX */
138
139/* maximum number of register used for input function arguments */
140static inline int tcg_target_get_call_iarg_regs_count(int flags)
141{
142 if (TCG_TARGET_REG_BITS == 64) {
143 return 6;
144 }
145
146 flags &= TCG_CALL_TYPE_MASK;
147 switch(flags) {
148 case TCG_CALL_TYPE_STD:
149 return 0;
150 case TCG_CALL_TYPE_REGPARM_1:
151 case TCG_CALL_TYPE_REGPARM_2:
152 case TCG_CALL_TYPE_REGPARM:
153 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
154 default:
155 tcg_abort();
156 }
157}
158
159/* parse target specific constraints */
160static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
161{
162 const char *ct_str;
163
164 ct_str = *pct_str;
165 switch(ct_str[0]) {
166 case 'a':
167 ct->ct |= TCG_CT_REG;
168 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
169 break;
170 case 'b':
171 ct->ct |= TCG_CT_REG;
172 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
173 break;
174 case 'c':
175 ct->ct |= TCG_CT_REG;
176 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
177 break;
178 case 'd':
179 ct->ct |= TCG_CT_REG;
180 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
181 break;
182 case 'S':
183 ct->ct |= TCG_CT_REG;
184 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
185 break;
186 case 'D':
187 ct->ct |= TCG_CT_REG;
188 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
189 break;
190 case 'q':
191 ct->ct |= TCG_CT_REG;
192 if (TCG_TARGET_REG_BITS == 64) {
193 tcg_regset_set32(ct->u.regs, 0, 0xffff);
194 } else {
195 tcg_regset_set32(ct->u.regs, 0, 0xf);
196 }
197 break;
198 case 'r':
199 ct->ct |= TCG_CT_REG;
200 if (TCG_TARGET_REG_BITS == 64) {
201 tcg_regset_set32(ct->u.regs, 0, 0xffff);
202 } else {
203 tcg_regset_set32(ct->u.regs, 0, 0xff);
204 }
205 break;
206
207 /* qemu_ld/st address constraint */
208 case 'L':
209 ct->ct |= TCG_CT_REG;
210 if (TCG_TARGET_REG_BITS == 64) {
211 tcg_regset_set32(ct->u.regs, 0, 0xffff);
212#if defined(VBOX) && defined(__MINGW64__)
213 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
214 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
215 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
216#else
217 /** @todo figure why RDX isn't mentioned here. */
218 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
219 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
220#endif
221 } else {
222 tcg_regset_set32(ct->u.regs, 0, 0xff);
223 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
224 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
225 }
226 break;
227
228 case 'e':
229 ct->ct |= TCG_CT_CONST_S32;
230 break;
231 case 'Z':
232 ct->ct |= TCG_CT_CONST_U32;
233 break;
234
235 default:
236 return -1;
237 }
238 ct_str++;
239 *pct_str = ct_str;
240 return 0;
241}
242
243/* test if a constant matches the constraint */
244static inline int tcg_target_const_match(tcg_target_long val,
245 const TCGArgConstraint *arg_ct)
246{
247 int ct = arg_ct->ct;
248 if (ct & TCG_CT_CONST) {
249 return 1;
250 }
251 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
252 return 1;
253 }
254 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
255 return 1;
256 }
257 return 0;
258}
259
260#if TCG_TARGET_REG_BITS == 64
261# define LOWREGMASK(x) ((x) & 7)
262#else
263# define LOWREGMASK(x) (x)
264#endif
265
266#define P_EXT 0x100 /* 0x0f opcode prefix */
267#define P_DATA16 0x200 /* 0x66 opcode prefix */
268#if TCG_TARGET_REG_BITS == 64
269# define P_ADDR32 0x400 /* 0x67 opcode prefix */
270# define P_REXW 0x800 /* Set REX.W = 1 */
271# define P_REXB_R 0x1000 /* REG field as byte register */
272# define P_REXB_RM 0x2000 /* R/M field as byte register */
273#else
274# define P_ADDR32 0
275# define P_REXW 0
276# define P_REXB_R 0
277# define P_REXB_RM 0
278#endif
279
280#define OPC_ARITH_EvIz (0x81)
281#define OPC_ARITH_EvIb (0x83)
282#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
283#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
284#define OPC_BSWAP (0xc8 | P_EXT)
285#define OPC_CALL_Jz (0xe8)
286#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
287#define OPC_DEC_r32 (0x48)
288#define OPC_IMUL_GvEv (0xaf | P_EXT)
289#define OPC_IMUL_GvEvIb (0x6b)
290#define OPC_IMUL_GvEvIz (0x69)
291#define OPC_INC_r32 (0x40)
292#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
293#define OPC_JCC_short (0x70) /* ... plus condition code */
294#define OPC_JMP_long (0xe9)
295#define OPC_JMP_short (0xeb)
296#define OPC_LEA (0x8d)
297#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
298#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
299#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
300#define OPC_MOVL_EvIz (0xc7)
301#define OPC_MOVL_Iv (0xb8)
302#define OPC_MOVSBL (0xbe | P_EXT)
303#define OPC_MOVSWL (0xbf | P_EXT)
304#define OPC_MOVSLQ (0x63 | P_REXW)
305#define OPC_MOVZBL (0xb6 | P_EXT)
306#define OPC_MOVZWL (0xb7 | P_EXT)
307#define OPC_POP_r32 (0x58)
308#define OPC_PUSH_r32 (0x50)
309#define OPC_PUSH_Iv (0x68)
310#define OPC_PUSH_Ib (0x6a)
311#define OPC_RET (0xc3)
312#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
313#define OPC_SHIFT_1 (0xd1)
314#define OPC_SHIFT_Ib (0xc1)
315#define OPC_SHIFT_cl (0xd3)
316#define OPC_TESTL (0x85)
317#define OPC_XCHG_ax_r32 (0x90)
318
319#define OPC_GRP3_Ev (0xf7)
320#define OPC_GRP5 (0xff)
321
322/* Group 1 opcode extensions for 0x80-0x83.
323 These are also used as modifiers for OPC_ARITH. */
324#define ARITH_ADD 0
325#define ARITH_OR 1
326#define ARITH_ADC 2
327#define ARITH_SBB 3
328#define ARITH_AND 4
329#define ARITH_SUB 5
330#define ARITH_XOR 6
331#define ARITH_CMP 7
332
333/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
334#define SHIFT_ROL 0
335#define SHIFT_ROR 1
336#define SHIFT_SHL 4
337#define SHIFT_SHR 5
338#define SHIFT_SAR 7
339
340/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
341#define EXT3_NOT 2
342#define EXT3_NEG 3
343#define EXT3_MUL 4
344#define EXT3_IMUL 5
345#define EXT3_DIV 6
346#define EXT3_IDIV 7
347
348/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
349#define EXT5_INC_Ev 0
350#define EXT5_DEC_Ev 1
351#define EXT5_CALLN_Ev 2
352#define EXT5_JMPN_Ev 4
353
354/* Condition codes to be added to OPC_JCC_{long,short}. */
355#define JCC_JMP (-1)
356#define JCC_JO 0x0
357#define JCC_JNO 0x1
358#define JCC_JB 0x2
359#define JCC_JAE 0x3
360#define JCC_JE 0x4
361#define JCC_JNE 0x5
362#define JCC_JBE 0x6
363#define JCC_JA 0x7
364#define JCC_JS 0x8
365#define JCC_JNS 0x9
366#define JCC_JP 0xa
367#define JCC_JNP 0xb
368#define JCC_JL 0xc
369#define JCC_JGE 0xd
370#define JCC_JLE 0xe
371#define JCC_JG 0xf
372
373static const uint8_t tcg_cond_to_jcc[10] = {
374 [TCG_COND_EQ] = JCC_JE,
375 [TCG_COND_NE] = JCC_JNE,
376 [TCG_COND_LT] = JCC_JL,
377 [TCG_COND_GE] = JCC_JGE,
378 [TCG_COND_LE] = JCC_JLE,
379 [TCG_COND_GT] = JCC_JG,
380 [TCG_COND_LTU] = JCC_JB,
381 [TCG_COND_GEU] = JCC_JAE,
382 [TCG_COND_LEU] = JCC_JBE,
383 [TCG_COND_GTU] = JCC_JA,
384};
385
386#if defined(VBOX)
387/* Calc the size of the tcg_out_opc() result. */
388static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x)
389{
390 unsigned char len = 1;
391# if TCG_TARGET_REG_BITS == 64
392 unsigned rex;
393 rex = 0;
394 rex |= (opc & P_REXW) >> 8; /* REX.W */
395 rex |= (r & 8) >> 1; /* REX.R */
396 rex |= (x & 8) >> 2; /* REX.X */
397 rex |= (rm & 8) >> 3; /* REX.B */
398 rex |= opc & (r >= 4 ? P_REXB_R : 0);
399 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
400 if (rex) len++;
401 if (opc & P_ADDR32) len++;
402# endif
403 if (opc & P_DATA16) len++;
404 if (opc & P_EXT) len++;
405
406 return len;
407}
408#endif
409
410#if TCG_TARGET_REG_BITS == 64
411static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
412{
413 int rex;
414
415 if (opc & P_DATA16) {
416 /* We should never be asking for both 16 and 64-bit operation. */
417 assert((opc & P_REXW) == 0);
418 tcg_out8(s, 0x66);
419 }
420 if (opc & P_ADDR32) {
421 tcg_out8(s, 0x67);
422 }
423
424 rex = 0;
425 rex |= (opc & P_REXW) >> 8; /* REX.W */
426 rex |= (r & 8) >> 1; /* REX.R */
427 rex |= (x & 8) >> 2; /* REX.X */
428 rex |= (rm & 8) >> 3; /* REX.B */
429
430 /* P_REXB_{R,RM} indicates that the given register is the low byte.
431 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
432 as otherwise the encoding indicates %[abcd]h. Note that the values
433 that are ORed in merely indicate that the REX byte must be present;
434 those bits get discarded in output. */
435 rex |= opc & (r >= 4 ? P_REXB_R : 0);
436 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
437
438 if (rex) {
439 tcg_out8(s, (uint8_t)(rex | 0x40));
440 }
441
442 if (opc & P_EXT) {
443 tcg_out8(s, 0x0f);
444 }
445 tcg_out8(s, opc);
446}
447#else
448static void tcg_out_opc(TCGContext *s, int opc)
449{
450 if (opc & P_DATA16) {
451 tcg_out8(s, 0x66);
452 }
453 if (opc & P_EXT) {
454 tcg_out8(s, 0x0f);
455 }
456 tcg_out8(s, opc);
457}
458/* Discard the register arguments to tcg_out_opc early, so as not to penalize
459 the 32-bit compilation paths. This method works with all versions of gcc,
460 whereas relying on optimization may not be able to exclude them. */
461#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
462#endif
463
464static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
465{
466 tcg_out_opc(s, opc, r, rm, 0);
467 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
468}
469
470/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
471 We handle either RM and INDEX missing with a negative value. In 64-bit
472 mode for absolute addresses, ~RM is the size of the immediate operand
473 that will follow the instruction. */
474
475static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
476 int index, int shift,
477 tcg_target_long offset)
478{
479 int mod, len;
480
481 if (index < 0 && rm < 0) {
482 if (TCG_TARGET_REG_BITS == 64) {
483 /* Try for a rip-relative addressing mode. This has replaced
484 the 32-bit-mode absolute addressing encoding. */
485#ifdef VBOX
486 tcg_target_long pc = (tcg_target_long)s->code_ptr
487 + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4;
488#else
489 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
490#endif
491 tcg_target_long disp = offset - pc;
492 if (disp == (int32_t)disp) {
493 tcg_out_opc(s, opc, r, 0, 0);
494 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
495 tcg_out32(s, disp);
496#ifdef VBOX
497 Assert(pc == (tcg_target_long)s->code_ptr);
498#endif
499 return;
500 }
501
502 /* Try for an absolute address encoding. This requires the
503 use of the MODRM+SIB encoding and is therefore larger than
504 rip-relative addressing. */
505 if (offset == (int32_t)offset) {
506 tcg_out_opc(s, opc, r, 0, 0);
507 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
508 tcg_out8(s, (4 << 3) | 5);
509 tcg_out32(s, offset);
510 return;
511 }
512
513 /* ??? The memory isn't directly addressable. */
514 tcg_abort();
515 } else {
516 /* Absolute address. */
517 tcg_out_opc(s, opc, r, 0, 0);
518 tcg_out8(s, (r << 3) | 5);
519 tcg_out32(s, offset);
520 return;
521 }
522 }
523
524 /* Find the length of the immediate addend. Note that the encoding
525 that would be used for (%ebp) indicates absolute addressing. */
526 if (rm < 0) {
527 mod = 0, len = 4, rm = 5;
528 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
529 mod = 0, len = 0;
530 } else if (offset == (int8_t)offset) {
531 mod = 0x40, len = 1;
532 } else {
533 mod = 0x80, len = 4;
534 }
535
536 /* Use a single byte MODRM format if possible. Note that the encoding
537 that would be used for %esp is the escape to the two byte form. */
538 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
539 /* Single byte MODRM format. */
540 tcg_out_opc(s, opc, r, rm, 0);
541 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
542 } else {
543 /* Two byte MODRM+SIB format. */
544
545 /* Note that the encoding that would place %esp into the index
546 field indicates no index register. In 64-bit mode, the REX.X
547 bit counts, so %r12 can be used as the index. */
548 if (index < 0) {
549 index = 4;
550 } else {
551 assert(index != TCG_REG_ESP);
552 }
553
554 tcg_out_opc(s, opc, r, rm, index);
555 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
556 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
557 }
558
559 if (len == 1) {
560 tcg_out8(s, offset);
561 } else if (len == 4) {
562 tcg_out32(s, offset);
563 }
564}
565
566/* A simplification of the above with no index or shift. */
567static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
568 int rm, tcg_target_long offset)
569{
570 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
571}
572
573/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
574static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
575{
576 /* Propagate an opcode prefix, such as P_REXW. */
577 int ext = subop & ~0x7;
578 subop &= 0x7;
579
580 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
581}
582
583static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
584{
585 if (arg != ret) {
586 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
587 tcg_out_modrm(s, opc, ret, arg);
588 }
589}
590
591static void tcg_out_movi(TCGContext *s, TCGType type,
592 int ret, tcg_target_long arg)
593{
594 if (arg == 0) {
595 tgen_arithr(s, ARITH_XOR, ret, ret);
596 return;
597 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
598 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
599 tcg_out32(s, arg);
600 } else if (arg == (int32_t)arg) {
601 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
602 tcg_out32(s, arg);
603 } else {
604 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
605 tcg_out32(s, arg);
606 tcg_out32(s, arg >> 31 >> 1);
607 }
608}
609
610static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
611{
612 if (val == (int8_t)val) {
613 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
614 tcg_out8(s, val);
615 } else if (val == (int32_t)val) {
616 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
617 tcg_out32(s, val);
618 } else {
619 tcg_abort();
620 }
621}
622
623static inline void tcg_out_push(TCGContext *s, int reg)
624{
625 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
626}
627
628static inline void tcg_out_pop(TCGContext *s, int reg)
629{
630 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
631}
632
633static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
634 int arg1, tcg_target_long arg2)
635{
636 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
637 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
638}
639
640static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
641 int arg1, tcg_target_long arg2)
642{
643 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
644 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
645}
646
647static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
648{
649 /* Propagate an opcode prefix, such as P_DATA16. */
650 int ext = subopc & ~0x7;
651 subopc &= 0x7;
652
653 if (count == 1) {
654 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
655 } else {
656 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
657 tcg_out8(s, count);
658 }
659}
660
661static inline void tcg_out_bswap32(TCGContext *s, int reg)
662{
663 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
664}
665
666static inline void tcg_out_rolw_8(TCGContext *s, int reg)
667{
668 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
669}
670
671static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
672{
673 /* movzbl */
674 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
675 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
676}
677
678static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
679{
680 /* movsbl */
681 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
682 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
683}
684
685static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
686{
687 /* movzwl */
688 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
689}
690
691static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
692{
693 /* movsw[lq] */
694 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
695}
696
697static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
698{
699 /* 32-bit mov zero extends. */
700 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
701}
702
703static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
704{
705 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
706}
707
708static inline void tcg_out_bswap64(TCGContext *s, int reg)
709{
710 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
711}
712
713static void tgen_arithi(TCGContext *s, int c, int r0,
714 tcg_target_long val, int cf)
715{
716 int rexw = 0;
717
718 if (TCG_TARGET_REG_BITS == 64) {
719 rexw = c & -8;
720 c &= 7;
721 }
722
723 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
724 partial flags update stalls on Pentium4 and are not recommended
725 by current Intel optimization manuals. */
726 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
727 int is_inc = (c == ARITH_ADD) ^ (val < 0);
728 if (TCG_TARGET_REG_BITS == 64) {
729 /* The single-byte increment encodings are re-tasked as the
730 REX prefixes. Use the MODRM encoding. */
731 tcg_out_modrm(s, OPC_GRP5 + rexw,
732 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
733 } else {
734 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
735 }
736 return;
737 }
738
739 if (c == ARITH_AND) {
740 if (TCG_TARGET_REG_BITS == 64) {
741 if (val == 0xffffffffu) {
742 tcg_out_ext32u(s, r0, r0);
743 return;
744 }
745 if (val == (uint32_t)val) {
746 /* AND with no high bits set can use a 32-bit operation. */
747 rexw = 0;
748 }
749 }
750 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
751 tcg_out_ext8u(s, r0, r0);
752 return;
753 }
754 if (val == 0xffffu) {
755 tcg_out_ext16u(s, r0, r0);
756 return;
757 }
758 }
759
760 if (val == (int8_t)val) {
761 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
762 tcg_out8(s, val);
763 return;
764 }
765 if (rexw == 0 || val == (int32_t)val) {
766 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
767 tcg_out32(s, val);
768 return;
769 }
770
771 tcg_abort();
772}
773
774static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
775{
776 if (val != 0) {
777 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
778 }
779}
780
781#if defined(VBOX) && defined(RT_OS_DARWIN) && ARCH_BITS == 32
782# define VBOX_16_BYTE_STACK_ALIGN
783#endif
784#ifdef VBOX_16_BYTE_STACK_ALIGN
785static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
786{
787 if (val != 0) {
788 tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
789 }
790}
791#endif
792
793/* Use SMALL != 0 to force a short forward branch. */
794static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
795{
796 int32_t val, val1;
797 TCGLabel *l = &s->labels[label_index];
798
799 if (l->has_value) {
800 val = l->u.value - (tcg_target_long)s->code_ptr;
801 val1 = val - 2;
802 if ((int8_t)val1 == val1) {
803 if (opc == -1) {
804 tcg_out8(s, OPC_JMP_short);
805 } else {
806 tcg_out8(s, OPC_JCC_short + opc);
807 }
808 tcg_out8(s, val1);
809 } else {
810 if (small) {
811 tcg_abort();
812 }
813 if (opc == -1) {
814 tcg_out8(s, OPC_JMP_long);
815 tcg_out32(s, val - 5);
816 } else {
817 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
818 tcg_out32(s, val - 6);
819 }
820 }
821 } else if (small) {
822 if (opc == -1) {
823 tcg_out8(s, OPC_JMP_short);
824 } else {
825 tcg_out8(s, OPC_JCC_short + opc);
826 }
827 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
828 s->code_ptr += 1;
829 } else {
830 if (opc == -1) {
831 tcg_out8(s, OPC_JMP_long);
832 } else {
833 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
834 }
835 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
836 s->code_ptr += 4;
837 }
838}
839
840static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
841 int const_arg2, int rexw)
842{
843 if (const_arg2) {
844 if (arg2 == 0) {
845 /* test r, r */
846 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
847 } else {
848 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
849 }
850 } else {
851 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
852 }
853}
854
855static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
856 TCGArg arg1, TCGArg arg2, int const_arg2,
857 int label_index, int small)
858{
859 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
860 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
861}
862
863#if TCG_TARGET_REG_BITS == 64
864static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
865 TCGArg arg1, TCGArg arg2, int const_arg2,
866 int label_index, int small)
867{
868 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
869 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
870}
871#else
872/* XXX: we implement it at the target level to avoid having to
873 handle cross basic blocks temporaries */
874static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
875 const int *const_args, int small)
876{
877 int label_next;
878 label_next = gen_new_label();
879 switch(args[4]) {
880 case TCG_COND_EQ:
881 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
882 label_next, 1);
883 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
884 args[5], small);
885 break;
886 case TCG_COND_NE:
887 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
888 args[5], small);
889 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
890 args[5], small);
891 break;
892 case TCG_COND_LT:
893 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
894 args[5], small);
895 tcg_out_jxx(s, JCC_JNE, label_next, 1);
896 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
897 args[5], small);
898 break;
899 case TCG_COND_LE:
900 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
901 args[5], small);
902 tcg_out_jxx(s, JCC_JNE, label_next, 1);
903 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
904 args[5], small);
905 break;
906 case TCG_COND_GT:
907 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
908 args[5], small);
909 tcg_out_jxx(s, JCC_JNE, label_next, 1);
910 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
911 args[5], small);
912 break;
913 case TCG_COND_GE:
914 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
915 args[5], small);
916 tcg_out_jxx(s, JCC_JNE, label_next, 1);
917 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
918 args[5], small);
919 break;
920 case TCG_COND_LTU:
921 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
922 args[5], small);
923 tcg_out_jxx(s, JCC_JNE, label_next, 1);
924 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
925 args[5], small);
926 break;
927 case TCG_COND_LEU:
928 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
929 args[5], small);
930 tcg_out_jxx(s, JCC_JNE, label_next, 1);
931 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
932 args[5], small);
933 break;
934 case TCG_COND_GTU:
935 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
936 args[5], small);
937 tcg_out_jxx(s, JCC_JNE, label_next, 1);
938 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
939 args[5], small);
940 break;
941 case TCG_COND_GEU:
942 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
943 args[5], small);
944 tcg_out_jxx(s, JCC_JNE, label_next, 1);
945 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
946 args[5], small);
947 break;
948 default:
949 tcg_abort();
950 }
951 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
952}
953#endif
954
955static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
956 TCGArg arg1, TCGArg arg2, int const_arg2)
957{
958 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
959 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
960 tcg_out_ext8u(s, dest, dest);
961}
962
963#if TCG_TARGET_REG_BITS == 64
964static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
965 TCGArg arg1, TCGArg arg2, int const_arg2)
966{
967 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
968 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
969 tcg_out_ext8u(s, dest, dest);
970}
971#else
972static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
973 const int *const_args)
974{
975 TCGArg new_args[6];
976 int label_true, label_over;
977
978 memcpy(new_args, args+1, 5*sizeof(TCGArg));
979
980 if (args[0] == args[1] || args[0] == args[2]
981 || (!const_args[3] && args[0] == args[3])
982 || (!const_args[4] && args[0] == args[4])) {
983 /* When the destination overlaps with one of the argument
984 registers, don't do anything tricky. */
985 label_true = gen_new_label();
986 label_over = gen_new_label();
987
988 new_args[5] = label_true;
989 tcg_out_brcond2(s, new_args, const_args+1, 1);
990
991 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
992 tcg_out_jxx(s, JCC_JMP, label_over, 1);
993 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
994
995 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
996 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
997 } else {
998 /* When the destination does not overlap one of the arguments,
999 clear the destination first, jump if cond false, and emit an
1000 increment in the true case. This results in smaller code. */
1001
1002 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1003
1004 label_over = gen_new_label();
1005 new_args[4] = tcg_invert_cond(new_args[4]);
1006 new_args[5] = label_over;
1007 tcg_out_brcond2(s, new_args, const_args+1, 1);
1008
1009 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1010 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
1011 }
1012}
1013#endif
1014
1015static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
1016{
1017#ifdef VBOX
1018 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr
1019 - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0)
1020 - 4;
1021#else
1022 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
1023#endif
1024
1025 if (disp == (int32_t)disp) {
1026 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1027 tcg_out32(s, disp);
1028 } else {
1029 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1030 tcg_out_modrm(s, OPC_GRP5,
1031 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1032 }
1033}
1034
1035static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
1036{
1037#ifdef VBOX
1038 tcg_gen_stack_alignment_check(s);
1039#endif
1040 tcg_out_branch(s, 1, dest);
1041}
1042
1043static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
1044{
1045 tcg_out_branch(s, 0, dest);
1046}
1047
1048#if defined(CONFIG_SOFTMMU)
1049
1050#include "../../softmmu_defs.h"
1051
1052static void *qemu_ld_helpers[4] = {
1053 __ldb_mmu,
1054 __ldw_mmu,
1055 __ldl_mmu,
1056 __ldq_mmu,
1057};
1058
1059static void *qemu_st_helpers[4] = {
1060 __stb_mmu,
1061 __stw_mmu,
1062 __stl_mmu,
1063 __stq_mmu,
1064};
1065
1066/* Perform the TLB load and compare.
1067
1068 Inputs:
1069 ADDRLO_IDX contains the index into ARGS of the low part of the
1070 address; the high part of the address is at ADDR_LOW_IDX+1.
1071
1072 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1073
1074 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1075 This should be offsetof addr_read or addr_write.
1076
1077 Outputs:
1078 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1079 positions of the displacements of forward jumps to the TLB miss case.
1080
1081 First argument register is loaded with the low part of the address.
1082 In the TLB hit case, it has been adjusted as indicated by the TLB
1083 and so is a host address. In the TLB miss case, it continues to
1084 hold a guest address.
1085
1086 Second argument register is clobbered. */
1087
1088static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1089 int mem_index, int s_bits,
1090 const TCGArg *args,
1091 uint8_t **label_ptr, int which)
1092{
1093 const int addrlo = args[addrlo_idx];
1094 const int r0 = tcg_target_call_iarg_regs[0];
1095 const int r1 = tcg_target_call_iarg_regs[1];
1096 TCGType type = TCG_TYPE_I32;
1097 int rexw = 0;
1098
1099 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1100 type = TCG_TYPE_I64;
1101 rexw = P_REXW;
1102 }
1103
1104 tcg_out_mov(s, type, r1, addrlo);
1105 tcg_out_mov(s, type, r0, addrlo);
1106
1107 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1108 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1109
1110 tgen_arithi(s, ARITH_AND + rexw, r0,
1111 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1112 tgen_arithi(s, ARITH_AND + rexw, r1,
1113 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1114
1115 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1116 offsetof(CPUState, tlb_table[mem_index][0])
1117 + which);
1118
1119 /* cmp 0(r1), r0 */
1120 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1121
1122 tcg_out_mov(s, type, r0, addrlo);
1123
1124 /* jne label1 */
1125 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1126 label_ptr[0] = s->code_ptr;
1127 s->code_ptr++;
1128
1129 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1130 /* cmp 4(r1), addrhi */
1131 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1132
1133 /* jne label1 */
1134 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1135 label_ptr[1] = s->code_ptr;
1136 s->code_ptr++;
1137 }
1138
1139 /* TLB Hit. */
1140
1141 /* add addend(r1), r0 */
1142 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1143 offsetof(CPUTLBEntry, addend) - which);
1144}
1145#endif
1146
1147static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1148 int base, tcg_target_long ofs, int sizeop)
1149{
1150#ifdef TARGET_WORDS_BIGENDIAN
1151 const int bswap = 1;
1152#else
1153 const int bswap = 0;
1154#endif
1155 switch (sizeop) {
1156 case 0:
1157 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1158 break;
1159 case 0 | 4:
1160 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1161 break;
1162 case 1:
1163 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1164 if (bswap) {
1165 tcg_out_rolw_8(s, datalo);
1166 }
1167 break;
1168 case 1 | 4:
1169 if (bswap) {
1170 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1171 tcg_out_rolw_8(s, datalo);
1172 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1173 } else {
1174 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1175 }
1176 break;
1177 case 2:
1178 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1179 if (bswap) {
1180 tcg_out_bswap32(s, datalo);
1181 }
1182 break;
1183#if TCG_TARGET_REG_BITS == 64
1184 case 2 | 4:
1185 if (bswap) {
1186 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1187 tcg_out_bswap32(s, datalo);
1188 tcg_out_ext32s(s, datalo, datalo);
1189 } else {
1190 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1191 }
1192 break;
1193#endif
1194 case 3:
1195 if (TCG_TARGET_REG_BITS == 64) {
1196 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1197 if (bswap) {
1198 tcg_out_bswap64(s, datalo);
1199 }
1200 } else {
1201 if (bswap) {
1202 int t = datalo;
1203 datalo = datahi;
1204 datahi = t;
1205 }
1206 if (base != datalo) {
1207 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1208 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1209 } else {
1210 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1211 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1212 }
1213 if (bswap) {
1214 tcg_out_bswap32(s, datalo);
1215 tcg_out_bswap32(s, datahi);
1216 }
1217 }
1218 break;
1219 default:
1220 tcg_abort();
1221 }
1222}
1223
1224#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
1225
1226static void * const vbox_ld_helpers[] = {
1227 __ldub_vbox_phys,
1228 __lduw_vbox_phys,
1229 __ldul_vbox_phys,
1230 __ldq_vbox_phys,
1231 __ldb_vbox_phys,
1232 __ldw_vbox_phys,
1233 __ldl_vbox_phys,
1234 __ldq_vbox_phys,
1235};
1236
1237static void * const vbox_st_helpers[] = {
1238 __stb_vbox_phys,
1239 __stw_vbox_phys,
1240 __stl_vbox_phys,
1241 __stq_vbox_phys
1242};
1243
1244DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
1245{
1246 intptr_t disp;
1247# ifdef VBOX
1248 tcg_gen_stack_alignment_check(s);
1249# endif
1250 disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
1251 tcg_out8(s, 0xe8); /* call disp32 */
1252 tcg_out32(s, disp); /* disp32 */
1253}
1254
1255static void tcg_out_vbox_phys_read(TCGContext *s, int index,
1256 int addr_reg,
1257 int data_reg, int data_reg2)
1258{
1259 int useReg2 = ((index & 3) == 3);
1260
1261 /** @todo: should we make phys address accessors fastcalls - probably not a big deal */
1262 /* out parameter (address), note that phys address is always 64-bit */
1263 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1264
1265# if 0
1266 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1267 tcg_out_push(s, addr_reg);
1268# else
1269 /* mov addr_reg, %eax */
1270 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1271# endif
1272
1273 tcg_out_long_call(s, vbox_ld_helpers[index]);
1274
1275 /* mov %eax, data_reg */
1276 tcg_out_mov(s, data_reg, TCG_REG_EAX);
1277
1278 /* returned 64-bit value */
1279 if (useReg2)
1280 tcg_out_mov(s, data_reg2, TCG_REG_EDX);
1281}
1282
1283static void tcg_out_vbox_phys_write(TCGContext *s, int index,
1284 int addr_reg,
1285 int val_reg, int val_reg2) {
1286 int useReg2 = ((index & 3) == 3);
1287
1288# if 0
1289 /* out parameter (value2) */
1290 if (useReg2)
1291 tcg_out_push(s, val_reg2);
1292 /* out parameter (value) */
1293 tcg_out_push(s, val_reg);
1294 /* out parameter (address), note that phys address is always 64-bit */
1295 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1296 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1297 tcg_out_push(s, addr_reg);
1298# else
1299 Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
1300 /* mov addr_reg, %eax */
1301 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1302 Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
1303 /* mov val_reg, %edx */
1304 tcg_out_mov(s, TCG_REG_EDX, val_reg);
1305 if (useReg2)
1306 tcg_out_mov(s, TCG_REG_ECX, val_reg2);
1307
1308# endif
1309 /* call it */
1310 tcg_out_long_call(s, vbox_st_helpers[index]);
1311
1312 /* clean stack after us */
1313# if 0
1314 tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
1315# endif
1316}
1317
1318#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
1319
1320/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1321 EAX. It will be useful once fixed registers globals are less
1322 common. */
1323static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1324 int opc)
1325{
1326 int data_reg, data_reg2 = 0;
1327 int addrlo_idx;
1328#if defined(CONFIG_SOFTMMU)
1329 int mem_index, s_bits, arg_idx;
1330 uint8_t *label_ptr[3];
1331#endif
1332
1333 data_reg = args[0];
1334 addrlo_idx = 1;
1335 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1336 data_reg2 = args[1];
1337 addrlo_idx = 2;
1338 }
1339
1340#if defined(CONFIG_SOFTMMU)
1341 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1342 s_bits = opc & 3;
1343
1344 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1345 label_ptr, offsetof(CPUTLBEntry, addr_read));
1346
1347 /* TLB Hit. */
1348 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1349 tcg_target_call_iarg_regs[0], 0, opc);
1350
1351 /* jmp label2 */
1352 tcg_out8(s, OPC_JMP_short);
1353 label_ptr[2] = s->code_ptr;
1354 s->code_ptr++;
1355
1356 /* TLB Miss. */
1357
1358 /* label1: */
1359 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1360 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1361 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1362 }
1363
1364 /* XXX: move that code at the end of the TB */
1365 /* The first argument is already loaded with addrlo. */
1366 arg_idx = 1;
1367 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1368 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1369 args[addrlo_idx + 1]);
1370 }
1371 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1372 mem_index);
1373 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1374
1375 switch(opc) {
1376 case 0 | 4:
1377 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1378 break;
1379 case 1 | 4:
1380 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1381 break;
1382 case 0:
1383 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1384 break;
1385 case 1:
1386 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1387 break;
1388 case 2:
1389 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1390 break;
1391#if TCG_TARGET_REG_BITS == 64
1392 case 2 | 4:
1393 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1394 break;
1395#endif
1396 case 3:
1397 if (TCG_TARGET_REG_BITS == 64) {
1398 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1399 } else if (data_reg == TCG_REG_EDX) {
1400 /* xchg %edx, %eax */
1401 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1402 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1403 } else {
1404 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1405 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1406 }
1407 break;
1408 default:
1409 tcg_abort();
1410 }
1411
1412 /* label2: */
1413 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1414#else
1415# if defined(VBOX) && defined(__MINGW64__)
1416# error port me
1417# endif
1418 {
1419 int32_t offset = GUEST_BASE;
1420 int base = args[addrlo_idx];
1421
1422 if (TCG_TARGET_REG_BITS == 64) {
1423 /* ??? We assume all operations have left us with register
1424 contents that are zero extended. So far this appears to
1425 be true. If we want to enforce this, we can either do
1426 an explicit zero-extension here, or (if GUEST_BASE == 0)
1427 use the ADDR32 prefix. For now, do nothing. */
1428
1429 if (offset != GUEST_BASE) {
1430 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1431 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1432 base = TCG_REG_RDI, offset = 0;
1433 }
1434 }
1435
1436 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1437 }
1438#endif
1439}
1440
1441static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1442 int base, tcg_target_long ofs, int sizeop)
1443{
1444#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1445#ifdef TARGET_WORDS_BIGENDIAN
1446 const int bswap = 1;
1447#else
1448 const int bswap = 0;
1449#endif
1450 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1451 we could perform the bswap twice to restore the original value
1452 instead of moving to the scratch. But as it is, the L constraint
1453 means that the second argument reg is definitely free here. */
1454 int scratch = tcg_target_call_iarg_regs[1];
1455
1456 switch (sizeop) {
1457 case 0:
1458 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1459 break;
1460 case 1:
1461 if (bswap) {
1462 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1463 tcg_out_rolw_8(s, scratch);
1464 datalo = scratch;
1465 }
1466 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1467 break;
1468 case 2:
1469 if (bswap) {
1470 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1471 tcg_out_bswap32(s, scratch);
1472 datalo = scratch;
1473 }
1474 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1475 break;
1476 case 3:
1477 if (TCG_TARGET_REG_BITS == 64) {
1478 if (bswap) {
1479 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1480 tcg_out_bswap64(s, scratch);
1481 datalo = scratch;
1482 }
1483 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1484 } else if (bswap) {
1485 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1486 tcg_out_bswap32(s, scratch);
1487 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1488 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1489 tcg_out_bswap32(s, scratch);
1490 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1491 } else {
1492 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1493 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1494 }
1495 break;
1496 default:
1497 tcg_abort();
1498 }
1499#else /* VBOX */
1500# error "broken"
1501 tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
1502#endif
1503}
1504
1505static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1506 int opc)
1507{
1508 int data_reg, data_reg2 = 0;
1509 int addrlo_idx;
1510#if defined(CONFIG_SOFTMMU)
1511 int mem_index, s_bits;
1512 int stack_adjust;
1513 uint8_t *label_ptr[3];
1514#endif
1515
1516 data_reg = args[0];
1517 addrlo_idx = 1;
1518 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1519 data_reg2 = args[1];
1520 addrlo_idx = 2;
1521 }
1522
1523#if defined(CONFIG_SOFTMMU)
1524 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1525 s_bits = opc;
1526
1527 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1528 label_ptr, offsetof(CPUTLBEntry, addr_write));
1529
1530 /* TLB Hit. */
1531 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1532 tcg_target_call_iarg_regs[0], 0, opc);
1533
1534 /* jmp label2 */
1535 tcg_out8(s, OPC_JMP_short);
1536 label_ptr[2] = s->code_ptr;
1537 s->code_ptr++;
1538
1539 /* TLB Miss. */
1540
1541 /* label1: */
1542 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1543 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1544 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1545 }
1546
1547# if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1548
1549 /* XXX: move that code at the end of the TB */
1550 if (TCG_TARGET_REG_BITS == 64) {
1551# if defined(VBOX) && defined(__MINGW64__)
1552 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1553 tcg_target_call_iarg_regs[1], data_reg);
1554 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
1555# else
1556 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1557 TCG_REG_RSI, data_reg);
1558 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1559# endif
1560 stack_adjust = 0;
1561 } else if (TARGET_LONG_BITS == 32) {
1562 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1563 if (opc == 3) {
1564 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1565# ifdef VBOX_16_BYTE_STACK_ALIGN
1566 tcg_out_subi(s, TCG_REG_ESP, 12);
1567# endif
1568 tcg_out_pushi(s, mem_index);
1569 stack_adjust = 4;
1570 } else {
1571 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1572 stack_adjust = 0;
1573 }
1574 } else {
1575 if (opc == 3) {
1576 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1577# ifdef VBOX_16_BYTE_STACK_ALIGN
1578 tcg_out_pushi(s, 0);
1579# endif
1580 tcg_out_pushi(s, mem_index);
1581 tcg_out_push(s, data_reg2);
1582 tcg_out_push(s, data_reg);
1583 stack_adjust = 12;
1584 } else {
1585 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1586 switch(opc) {
1587 case 0:
1588 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1589 break;
1590 case 1:
1591 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1592 break;
1593 case 2:
1594 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1595 break;
1596 }
1597# ifdef VBOX_16_BYTE_STACK_ALIGN
1598 tcg_out_subi(s, TCG_REG_ESP, 12);
1599# endif
1600 tcg_out_pushi(s, mem_index);
1601 stack_adjust = 4;
1602 }
1603 }
1604
1605 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1606
1607# ifdef VBOX_16_BYTE_STACK_ALIGN
1608 if (stack_adjust != 0) {
1609 tcg_out_addi(s, TCG_REG_ESP, RT_ALIGN(stack_adjust, 16));
1610 }
1611# else
1612 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1613 /* Pop and discard. This is 2 bytes smaller than the add. */
1614 tcg_out_pop(s, TCG_REG_ECX);
1615 } else if (stack_adjust != 0) {
1616 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1617 }
1618# endif
1619
1620# else /* VBOX && REM_PHYS_ADDR_IN_TLB */
1621# error Borked
1622 tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
1623# endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
1624
1625 /* label2: */
1626 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1627#else
1628# if defined(VBOX) && defined(__MINGW64__)
1629# error port me
1630# endif
1631 {
1632 int32_t offset = GUEST_BASE;
1633 int base = args[addrlo_idx];
1634
1635 if (TCG_TARGET_REG_BITS == 64) {
1636 /* ??? We assume all operations have left us with register
1637 contents that are zero extended. So far this appears to
1638 be true. If we want to enforce this, we can either do
1639 an explicit zero-extension here, or (if GUEST_BASE == 0)
1640 use the ADDR32 prefix. For now, do nothing. */
1641
1642 if (offset != GUEST_BASE) {
1643 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1644 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1645 base = TCG_REG_RDI, offset = 0;
1646 }
1647 }
1648
1649 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1650 }
1651#endif
1652}
1653
1654static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1655 const TCGArg *args, const int *const_args)
1656{
1657 int c, rexw = 0;
1658
1659#if TCG_TARGET_REG_BITS == 64
1660# define OP_32_64(x) \
1661 case glue(glue(INDEX_op_, x), _i64): \
1662 rexw = P_REXW; /* FALLTHRU */ \
1663 case glue(glue(INDEX_op_, x), _i32)
1664#else
1665# define OP_32_64(x) \
1666 case glue(glue(INDEX_op_, x), _i32)
1667#endif
1668
1669 switch(opc) {
1670 case INDEX_op_exit_tb:
1671 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1672 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1673 break;
1674 case INDEX_op_goto_tb:
1675 if (s->tb_jmp_offset) {
1676 /* direct jump method */
1677 tcg_out8(s, OPC_JMP_long); /* jmp im */
1678 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1679 tcg_out32(s, 0);
1680 } else {
1681 /* indirect jump method */
1682 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1683 (tcg_target_long)(s->tb_next + args[0]));
1684 }
1685 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1686 break;
1687 case INDEX_op_call:
1688 if (const_args[0]) {
1689 tcg_out_calli(s, args[0]);
1690 } else {
1691 /* call *reg */
1692 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1693 }
1694 break;
1695 case INDEX_op_jmp:
1696 if (const_args[0]) {
1697 tcg_out_jmp(s, args[0]);
1698 } else {
1699 /* jmp *reg */
1700 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1701 }
1702 break;
1703 case INDEX_op_br:
1704 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1705 break;
1706 case INDEX_op_movi_i32:
1707 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1708 break;
1709 OP_32_64(ld8u):
1710 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1711 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1712 break;
1713 OP_32_64(ld8s):
1714 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1715 break;
1716 OP_32_64(ld16u):
1717 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1718 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1719 break;
1720 OP_32_64(ld16s):
1721 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1722 break;
1723#if TCG_TARGET_REG_BITS == 64
1724 case INDEX_op_ld32u_i64:
1725#endif
1726 case INDEX_op_ld_i32:
1727 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1728 break;
1729
1730 OP_32_64(st8):
1731 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1732 args[0], args[1], args[2]);
1733 break;
1734 OP_32_64(st16):
1735 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1736 args[0], args[1], args[2]);
1737 break;
1738#if TCG_TARGET_REG_BITS == 64
1739 case INDEX_op_st32_i64:
1740#endif
1741 case INDEX_op_st_i32:
1742 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1743 break;
1744
1745 OP_32_64(add):
1746 /* For 3-operand addition, use LEA. */
1747 if (args[0] != args[1]) {
1748 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1749
1750 if (const_args[2]) {
1751 c3 = a2, a2 = -1;
1752 } else if (a0 == a2) {
1753 /* Watch out for dest = src + dest, since we've removed
1754 the matching constraint on the add. */
1755 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1756 break;
1757 }
1758
1759 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1760 break;
1761 }
1762 c = ARITH_ADD;
1763 goto gen_arith;
1764 OP_32_64(sub):
1765 c = ARITH_SUB;
1766 goto gen_arith;
1767 OP_32_64(and):
1768 c = ARITH_AND;
1769 goto gen_arith;
1770 OP_32_64(or):
1771 c = ARITH_OR;
1772 goto gen_arith;
1773 OP_32_64(xor):
1774 c = ARITH_XOR;
1775 goto gen_arith;
1776 gen_arith:
1777 if (const_args[2]) {
1778 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1779 } else {
1780 tgen_arithr(s, c + rexw, args[0], args[2]);
1781 }
1782 break;
1783
1784 OP_32_64(mul):
1785 if (const_args[2]) {
1786 int32_t val;
1787 val = args[2];
1788 if (val == (int8_t)val) {
1789 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1790 tcg_out8(s, val);
1791 } else {
1792 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1793 tcg_out32(s, val);
1794 }
1795 } else {
1796 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1797 }
1798 break;
1799
1800 OP_32_64(div2):
1801 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1802 break;
1803 OP_32_64(divu2):
1804 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1805 break;
1806
1807 OP_32_64(shl):
1808 c = SHIFT_SHL;
1809 goto gen_shift;
1810 OP_32_64(shr):
1811 c = SHIFT_SHR;
1812 goto gen_shift;
1813 OP_32_64(sar):
1814 c = SHIFT_SAR;
1815 goto gen_shift;
1816 OP_32_64(rotl):
1817 c = SHIFT_ROL;
1818 goto gen_shift;
1819 OP_32_64(rotr):
1820 c = SHIFT_ROR;
1821 goto gen_shift;
1822 gen_shift:
1823 if (const_args[2]) {
1824 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1825 } else {
1826 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1827 }
1828 break;
1829
1830 case INDEX_op_brcond_i32:
1831 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1832 args[3], 0);
1833 break;
1834 case INDEX_op_setcond_i32:
1835 tcg_out_setcond32(s, args[3], args[0], args[1],
1836 args[2], const_args[2]);
1837 break;
1838
1839 OP_32_64(bswap16):
1840 tcg_out_rolw_8(s, args[0]);
1841 break;
1842 OP_32_64(bswap32):
1843 tcg_out_bswap32(s, args[0]);
1844 break;
1845
1846 OP_32_64(neg):
1847 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1848 break;
1849 OP_32_64(not):
1850 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1851 break;
1852
1853 OP_32_64(ext8s):
1854 tcg_out_ext8s(s, args[0], args[1], rexw);
1855 break;
1856 OP_32_64(ext16s):
1857 tcg_out_ext16s(s, args[0], args[1], rexw);
1858 break;
1859 OP_32_64(ext8u):
1860 tcg_out_ext8u(s, args[0], args[1]);
1861 break;
1862 OP_32_64(ext16u):
1863 tcg_out_ext16u(s, args[0], args[1]);
1864 break;
1865
1866 case INDEX_op_qemu_ld8u:
1867 tcg_out_qemu_ld(s, args, 0);
1868 break;
1869 case INDEX_op_qemu_ld8s:
1870 tcg_out_qemu_ld(s, args, 0 | 4);
1871 break;
1872 case INDEX_op_qemu_ld16u:
1873 tcg_out_qemu_ld(s, args, 1);
1874 break;
1875 case INDEX_op_qemu_ld16s:
1876 tcg_out_qemu_ld(s, args, 1 | 4);
1877 break;
1878#if TCG_TARGET_REG_BITS == 64
1879 case INDEX_op_qemu_ld32u:
1880#endif
1881 case INDEX_op_qemu_ld32:
1882 tcg_out_qemu_ld(s, args, 2);
1883 break;
1884 case INDEX_op_qemu_ld64:
1885 tcg_out_qemu_ld(s, args, 3);
1886 break;
1887
1888 case INDEX_op_qemu_st8:
1889 tcg_out_qemu_st(s, args, 0);
1890 break;
1891 case INDEX_op_qemu_st16:
1892 tcg_out_qemu_st(s, args, 1);
1893 break;
1894 case INDEX_op_qemu_st32:
1895 tcg_out_qemu_st(s, args, 2);
1896 break;
1897 case INDEX_op_qemu_st64:
1898 tcg_out_qemu_st(s, args, 3);
1899 break;
1900
1901#if TCG_TARGET_REG_BITS == 32
1902 case INDEX_op_brcond2_i32:
1903 tcg_out_brcond2(s, args, const_args, 0);
1904 break;
1905 case INDEX_op_setcond2_i32:
1906 tcg_out_setcond2(s, args, const_args);
1907 break;
1908 case INDEX_op_mulu2_i32:
1909 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1910 break;
1911 case INDEX_op_add2_i32:
1912 if (const_args[4]) {
1913 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1914 } else {
1915 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1916 }
1917 if (const_args[5]) {
1918 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1919 } else {
1920 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1921 }
1922 break;
1923 case INDEX_op_sub2_i32:
1924 if (const_args[4]) {
1925 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1926 } else {
1927 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1928 }
1929 if (const_args[5]) {
1930 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1931 } else {
1932 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1933 }
1934 break;
1935#else /* TCG_TARGET_REG_BITS == 64 */
1936 case INDEX_op_movi_i64:
1937 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1938 break;
1939 case INDEX_op_ld32s_i64:
1940 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1941 break;
1942 case INDEX_op_ld_i64:
1943 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1944 break;
1945 case INDEX_op_st_i64:
1946 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1947 break;
1948 case INDEX_op_qemu_ld32s:
1949 tcg_out_qemu_ld(s, args, 2 | 4);
1950 break;
1951
1952 case INDEX_op_brcond_i64:
1953 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1954 args[3], 0);
1955 break;
1956 case INDEX_op_setcond_i64:
1957 tcg_out_setcond64(s, args[3], args[0], args[1],
1958 args[2], const_args[2]);
1959 break;
1960
1961 case INDEX_op_bswap64_i64:
1962 tcg_out_bswap64(s, args[0]);
1963 break;
1964 case INDEX_op_ext32u_i64:
1965 tcg_out_ext32u(s, args[0], args[1]);
1966 break;
1967 case INDEX_op_ext32s_i64:
1968 tcg_out_ext32s(s, args[0], args[1]);
1969 break;
1970#endif
1971
1972 default:
1973 tcg_abort();
1974 }
1975
1976#undef OP_32_64
1977}
1978
1979static const TCGTargetOpDef x86_op_defs[] = {
1980 { INDEX_op_exit_tb, { } },
1981 { INDEX_op_goto_tb, { } },
1982 { INDEX_op_call, { "ri" } },
1983 { INDEX_op_jmp, { "ri" } },
1984 { INDEX_op_br, { } },
1985 { INDEX_op_mov_i32, { "r", "r" } },
1986 { INDEX_op_movi_i32, { "r" } },
1987 { INDEX_op_ld8u_i32, { "r", "r" } },
1988 { INDEX_op_ld8s_i32, { "r", "r" } },
1989 { INDEX_op_ld16u_i32, { "r", "r" } },
1990 { INDEX_op_ld16s_i32, { "r", "r" } },
1991 { INDEX_op_ld_i32, { "r", "r" } },
1992 { INDEX_op_st8_i32, { "q", "r" } },
1993 { INDEX_op_st16_i32, { "r", "r" } },
1994 { INDEX_op_st_i32, { "r", "r" } },
1995
1996 { INDEX_op_add_i32, { "r", "r", "ri" } },
1997 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1998 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1999 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2000 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2001 { INDEX_op_and_i32, { "r", "0", "ri" } },
2002 { INDEX_op_or_i32, { "r", "0", "ri" } },
2003 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2004
2005 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2006 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2007 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2008 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2009 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2010
2011 { INDEX_op_brcond_i32, { "r", "ri" } },
2012
2013 { INDEX_op_bswap16_i32, { "r", "0" } },
2014 { INDEX_op_bswap32_i32, { "r", "0" } },
2015
2016 { INDEX_op_neg_i32, { "r", "0" } },
2017
2018 { INDEX_op_not_i32, { "r", "0" } },
2019
2020 { INDEX_op_ext8s_i32, { "r", "q" } },
2021 { INDEX_op_ext16s_i32, { "r", "r" } },
2022 { INDEX_op_ext8u_i32, { "r", "q" } },
2023 { INDEX_op_ext16u_i32, { "r", "r" } },
2024
2025 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2026
2027#if TCG_TARGET_REG_BITS == 32
2028 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2029 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2030 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2031 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2032 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2033#else
2034 { INDEX_op_mov_i64, { "r", "r" } },
2035 { INDEX_op_movi_i64, { "r" } },
2036 { INDEX_op_ld8u_i64, { "r", "r" } },
2037 { INDEX_op_ld8s_i64, { "r", "r" } },
2038 { INDEX_op_ld16u_i64, { "r", "r" } },
2039 { INDEX_op_ld16s_i64, { "r", "r" } },
2040 { INDEX_op_ld32u_i64, { "r", "r" } },
2041 { INDEX_op_ld32s_i64, { "r", "r" } },
2042 { INDEX_op_ld_i64, { "r", "r" } },
2043 { INDEX_op_st8_i64, { "r", "r" } },
2044 { INDEX_op_st16_i64, { "r", "r" } },
2045 { INDEX_op_st32_i64, { "r", "r" } },
2046 { INDEX_op_st_i64, { "r", "r" } },
2047
2048 { INDEX_op_add_i64, { "r", "0", "re" } },
2049 { INDEX_op_mul_i64, { "r", "0", "re" } },
2050 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2051 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2052 { INDEX_op_sub_i64, { "r", "0", "re" } },
2053 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2054 { INDEX_op_or_i64, { "r", "0", "re" } },
2055 { INDEX_op_xor_i64, { "r", "0", "re" } },
2056
2057 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2058 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2059 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2060 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2061 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2062
2063 { INDEX_op_brcond_i64, { "r", "re" } },
2064 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2065
2066 { INDEX_op_bswap16_i64, { "r", "0" } },
2067 { INDEX_op_bswap32_i64, { "r", "0" } },
2068 { INDEX_op_bswap64_i64, { "r", "0" } },
2069 { INDEX_op_neg_i64, { "r", "0" } },
2070 { INDEX_op_not_i64, { "r", "0" } },
2071
2072 { INDEX_op_ext8s_i64, { "r", "r" } },
2073 { INDEX_op_ext16s_i64, { "r", "r" } },
2074 { INDEX_op_ext32s_i64, { "r", "r" } },
2075 { INDEX_op_ext8u_i64, { "r", "r" } },
2076 { INDEX_op_ext16u_i64, { "r", "r" } },
2077 { INDEX_op_ext32u_i64, { "r", "r" } },
2078#endif
2079
2080#if TCG_TARGET_REG_BITS == 64
2081 { INDEX_op_qemu_ld8u, { "r", "L" } },
2082 { INDEX_op_qemu_ld8s, { "r", "L" } },
2083 { INDEX_op_qemu_ld16u, { "r", "L" } },
2084 { INDEX_op_qemu_ld16s, { "r", "L" } },
2085 { INDEX_op_qemu_ld32, { "r", "L" } },
2086 { INDEX_op_qemu_ld32u, { "r", "L" } },
2087 { INDEX_op_qemu_ld32s, { "r", "L" } },
2088 { INDEX_op_qemu_ld64, { "r", "L" } },
2089
2090 { INDEX_op_qemu_st8, { "L", "L" } },
2091 { INDEX_op_qemu_st16, { "L", "L" } },
2092 { INDEX_op_qemu_st32, { "L", "L" } },
2093 { INDEX_op_qemu_st64, { "L", "L" } },
2094#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2095 { INDEX_op_qemu_ld8u, { "r", "L" } },
2096 { INDEX_op_qemu_ld8s, { "r", "L" } },
2097 { INDEX_op_qemu_ld16u, { "r", "L" } },
2098 { INDEX_op_qemu_ld16s, { "r", "L" } },
2099 { INDEX_op_qemu_ld32, { "r", "L" } },
2100 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2101
2102 { INDEX_op_qemu_st8, { "cb", "L" } },
2103 { INDEX_op_qemu_st16, { "L", "L" } },
2104 { INDEX_op_qemu_st32, { "L", "L" } },
2105 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2106#else
2107 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2108 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2109 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2110 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2111 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2112 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2113
2114 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2115 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2116 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2117 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2118#endif
2119 { -1 },
2120};
2121
2122static int tcg_target_callee_save_regs[] = {
2123#if TCG_TARGET_REG_BITS == 64
2124 TCG_REG_RBP,
2125 TCG_REG_RBX,
2126# if defined(VBOX) && defined(__MINGW64__)
2127 TCG_REG_RSI,
2128 TCG_REG_RDI,
2129# endif
2130 TCG_REG_R12,
2131 TCG_REG_R13,
2132 /* TCG_REG_R14, */ /* Currently used for the global env. */
2133 TCG_REG_R15,
2134#else
2135# ifndef VBOX
2136 /* TCG_REG_EBP, */ /* Currently used for the global env. */
2137 TCG_REG_EBX,
2138 TCG_REG_ESI,
2139 TCG_REG_EDI,
2140# else
2141 TCG_REG_EBP,
2142 TCG_REG_EBX,
2143 /* TCG_REG_ESI, */ /* Currently used for the global env. */
2144 TCG_REG_EDI,
2145# endif
2146#endif
2147};
2148
2149/* Generate global QEMU prologue and epilogue code */
2150static void tcg_target_qemu_prologue(TCGContext *s)
2151{
2152 int i, frame_size, push_size, stack_addend;
2153
2154 /* TB prologue */
2155
2156 /* Save all callee saved registers. */
2157 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2158 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2159 }
2160# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
2161 tcg_out8(s, 0x31); /* xor ebp, ebp */
2162 tcg_out8(s, 0xed);
2163# endif
2164
2165 /* Reserve some stack space. */
2166 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
2167 push_size *= TCG_TARGET_REG_BITS / 8;
2168
2169 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
2170#if defined(VBOX) && defined(__MINGW64__)
2171 frame_size += TCG_TARGET_CALL_STACK_OFFSET;
2172#endif
2173 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
2174 ~(TCG_TARGET_STACK_ALIGN - 1);
2175 stack_addend = frame_size - push_size;
2176 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2177
2178 /* jmp *tb. */
2179 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
2180# ifdef VBOX
2181 tcg_gen_stack_alignment_check(s);
2182# endif
2183
2184 tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
2185
2186 /* TB epilogue */
2187 tb_ret_addr = s->code_ptr;
2188
2189 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
2190
2191 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2192 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2193 }
2194 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2195}
2196
2197static void tcg_target_init(TCGContext *s)
2198{
2199#if !defined(CONFIG_USER_ONLY)
2200 /* fail safe */
2201 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2202 tcg_abort();
2203#endif
2204
2205 if (TCG_TARGET_REG_BITS == 64) {
2206 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2207 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2208 } else {
2209 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2210 }
2211
2212 tcg_regset_clear(tcg_target_call_clobber_regs);
2213 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2214 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2215 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2216 if (TCG_TARGET_REG_BITS == 64) {
2217# if !defined(VBOX) || !defined(__MINGW64__)
2218 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2219 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2220# endif
2221 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2222 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2223 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2224 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2225 }
2226
2227 tcg_regset_clear(s->reserved_regs);
2228 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
2229
2230 tcg_add_target_add_op_defs(x86_op_defs);
2231}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette