VirtualBox

source: vbox/trunk/src/recompiler/tcg/i386/tcg-target.c@ 37697

Last change on this file since 37697 was 37697, checked in by vboxsync, 13 years ago

duh.

  • Property svn:eol-style set to native
File size: 65.8 KB
Line 
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53#else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61#endif
62};
63
64static const int tcg_target_call_iarg_regs[] = {
65#if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72#else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76#endif
77};
78
79static const int tcg_target_call_oarg_regs[2] = {
80 TCG_REG_EAX,
81 TCG_REG_EDX
82};
83
84static uint8_t *tb_ret_addr;
85
86static void patch_reloc(uint8_t *code_ptr, int type,
87 tcg_target_long value, tcg_target_long addend)
88{
89 value += addend;
90 switch(type) {
91 case R_386_PC32:
92 value -= (uintptr_t)code_ptr;
93 if (value != (int32_t)value) {
94 tcg_abort();
95 }
96 *(uint32_t *)code_ptr = value;
97 break;
98 case R_386_PC8:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int8_t)value) {
101 tcg_abort();
102 }
103 *(uint8_t *)code_ptr = value;
104 break;
105 default:
106 tcg_abort();
107 }
108}
109
110#ifdef VBOX
111/* emits stack alignment checks for strict builds. */
112DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
113{
114# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
115 tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
116 tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
117 tcg_out8(s, 0x74); /* jz imm8 */
118 tcg_out8(s, 1); /* $+3 (over int3) */
119 tcg_out8(s, 0xcc); /* int3 */
120# else
121 NOREF(s);
122# endif
123}
124#endif /* VBOX */
125
126/* maximum number of register used for input function arguments */
127static inline int tcg_target_get_call_iarg_regs_count(int flags)
128{
129 if (TCG_TARGET_REG_BITS == 64) {
130 return 6;
131 }
132
133 flags &= TCG_CALL_TYPE_MASK;
134 switch(flags) {
135 case TCG_CALL_TYPE_STD:
136 return 0;
137 case TCG_CALL_TYPE_REGPARM_1:
138 case TCG_CALL_TYPE_REGPARM_2:
139 case TCG_CALL_TYPE_REGPARM:
140 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
141 default:
142 tcg_abort();
143 }
144}
145
146/* parse target specific constraints */
147static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148{
149 const char *ct_str;
150
151 ct_str = *pct_str;
152 switch(ct_str[0]) {
153 case 'a':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156 break;
157 case 'b':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160 break;
161 case 'c':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164 break;
165 case 'd':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168 break;
169 case 'S':
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172 break;
173 case 'D':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176 break;
177 case 'q':
178 ct->ct |= TCG_CT_REG;
179 if (TCG_TARGET_REG_BITS == 64) {
180 tcg_regset_set32(ct->u.regs, 0, 0xffff);
181 } else {
182 tcg_regset_set32(ct->u.regs, 0, 0xf);
183 }
184 break;
185 case 'r':
186 ct->ct |= TCG_CT_REG;
187 if (TCG_TARGET_REG_BITS == 64) {
188 tcg_regset_set32(ct->u.regs, 0, 0xffff);
189 } else {
190 tcg_regset_set32(ct->u.regs, 0, 0xff);
191 }
192 break;
193
194 /* qemu_ld/st address constraint */
195 case 'L':
196 ct->ct |= TCG_CT_REG;
197 if (TCG_TARGET_REG_BITS == 64) {
198 tcg_regset_set32(ct->u.regs, 0, 0xffff);
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
201 } else {
202 tcg_regset_set32(ct->u.regs, 0, 0xff);
203 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
204 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
205 }
206 break;
207
208 case 'e':
209 ct->ct |= TCG_CT_CONST_S32;
210 break;
211 case 'Z':
212 ct->ct |= TCG_CT_CONST_U32;
213 break;
214
215 default:
216 return -1;
217 }
218 ct_str++;
219 *pct_str = ct_str;
220 return 0;
221}
222
223/* test if a constant matches the constraint */
224static inline int tcg_target_const_match(tcg_target_long val,
225 const TCGArgConstraint *arg_ct)
226{
227 int ct = arg_ct->ct;
228 if (ct & TCG_CT_CONST) {
229 return 1;
230 }
231 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
232 return 1;
233 }
234 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
235 return 1;
236 }
237 return 0;
238}
239
240#if TCG_TARGET_REG_BITS == 64
241# define LOWREGMASK(x) ((x) & 7)
242#else
243# define LOWREGMASK(x) (x)
244#endif
245
246#define P_EXT 0x100 /* 0x0f opcode prefix */
247#define P_DATA16 0x200 /* 0x66 opcode prefix */
248#if TCG_TARGET_REG_BITS == 64
249# define P_ADDR32 0x400 /* 0x67 opcode prefix */
250# define P_REXW 0x800 /* Set REX.W = 1 */
251# define P_REXB_R 0x1000 /* REG field as byte register */
252# define P_REXB_RM 0x2000 /* R/M field as byte register */
253#else
254# define P_ADDR32 0
255# define P_REXW 0
256# define P_REXB_R 0
257# define P_REXB_RM 0
258#endif
259
260#define OPC_ARITH_EvIz (0x81)
261#define OPC_ARITH_EvIb (0x83)
262#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
263#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
264#define OPC_BSWAP (0xc8 | P_EXT)
265#define OPC_CALL_Jz (0xe8)
266#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
267#define OPC_DEC_r32 (0x48)
268#define OPC_IMUL_GvEv (0xaf | P_EXT)
269#define OPC_IMUL_GvEvIb (0x6b)
270#define OPC_IMUL_GvEvIz (0x69)
271#define OPC_INC_r32 (0x40)
272#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
273#define OPC_JCC_short (0x70) /* ... plus condition code */
274#define OPC_JMP_long (0xe9)
275#define OPC_JMP_short (0xeb)
276#define OPC_LEA (0x8d)
277#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
278#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
279#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
280#define OPC_MOVL_EvIz (0xc7)
281#define OPC_MOVL_Iv (0xb8)
282#define OPC_MOVSBL (0xbe | P_EXT)
283#define OPC_MOVSWL (0xbf | P_EXT)
284#define OPC_MOVSLQ (0x63 | P_REXW)
285#define OPC_MOVZBL (0xb6 | P_EXT)
286#define OPC_MOVZWL (0xb7 | P_EXT)
287#define OPC_POP_r32 (0x58)
288#define OPC_PUSH_r32 (0x50)
289#define OPC_PUSH_Iv (0x68)
290#define OPC_PUSH_Ib (0x6a)
291#define OPC_RET (0xc3)
292#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293#define OPC_SHIFT_1 (0xd1)
294#define OPC_SHIFT_Ib (0xc1)
295#define OPC_SHIFT_cl (0xd3)
296#define OPC_TESTL (0x85)
297#define OPC_XCHG_ax_r32 (0x90)
298
299#define OPC_GRP3_Ev (0xf7)
300#define OPC_GRP5 (0xff)
301
302/* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304#define ARITH_ADD 0
305#define ARITH_OR 1
306#define ARITH_ADC 2
307#define ARITH_SBB 3
308#define ARITH_AND 4
309#define ARITH_SUB 5
310#define ARITH_XOR 6
311#define ARITH_CMP 7
312
313/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314#define SHIFT_ROL 0
315#define SHIFT_ROR 1
316#define SHIFT_SHL 4
317#define SHIFT_SHR 5
318#define SHIFT_SAR 7
319
320/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321#define EXT3_NOT 2
322#define EXT3_NEG 3
323#define EXT3_MUL 4
324#define EXT3_IMUL 5
325#define EXT3_DIV 6
326#define EXT3_IDIV 7
327
328/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329#define EXT5_INC_Ev 0
330#define EXT5_DEC_Ev 1
331#define EXT5_CALLN_Ev 2
332#define EXT5_JMPN_Ev 4
333
334/* Condition codes to be added to OPC_JCC_{long,short}. */
335#define JCC_JMP (-1)
336#define JCC_JO 0x0
337#define JCC_JNO 0x1
338#define JCC_JB 0x2
339#define JCC_JAE 0x3
340#define JCC_JE 0x4
341#define JCC_JNE 0x5
342#define JCC_JBE 0x6
343#define JCC_JA 0x7
344#define JCC_JS 0x8
345#define JCC_JNS 0x9
346#define JCC_JP 0xa
347#define JCC_JNP 0xb
348#define JCC_JL 0xc
349#define JCC_JGE 0xd
350#define JCC_JLE 0xe
351#define JCC_JG 0xf
352
353static const uint8_t tcg_cond_to_jcc[10] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364};
365
366#if defined(VBOX)
367/* Calc the size of the tcg_out_opc() result. */
368static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x)
369{
370 unsigned char len = 1;
371# if TCG_TARGET_REG_BITS == 64
372 unsigned rex;
373 rex = 0;
374 rex |= (opc & P_REXW) >> 8; /* REX.W */
375 rex |= (r & 8) >> 1; /* REX.R */
376 rex |= (x & 8) >> 2; /* REX.X */
377 rex |= (rm & 8) >> 3; /* REX.B */
378 rex |= opc & (r >= 4 ? P_REXB_R : 0);
379 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
380 if (rex) len++;
381 if (opc & P_ADDR32) len++;
382# endif
383 if (opc & P_DATA16) len++;
384 if (opc & P_EXT) len++;
385
386 return len;
387}
388#endif
389
390#if TCG_TARGET_REG_BITS == 64
391static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
392{
393 int rex;
394
395 if (opc & P_DATA16) {
396 /* We should never be asking for both 16 and 64-bit operation. */
397 assert((opc & P_REXW) == 0);
398 tcg_out8(s, 0x66);
399 }
400 if (opc & P_ADDR32) {
401 tcg_out8(s, 0x67);
402 }
403
404 rex = 0;
405 rex |= (opc & P_REXW) >> 8; /* REX.W */
406 rex |= (r & 8) >> 1; /* REX.R */
407 rex |= (x & 8) >> 2; /* REX.X */
408 rex |= (rm & 8) >> 3; /* REX.B */
409
410 /* P_REXB_{R,RM} indicates that the given register is the low byte.
411 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
412 as otherwise the encoding indicates %[abcd]h. Note that the values
413 that are ORed in merely indicate that the REX byte must be present;
414 those bits get discarded in output. */
415 rex |= opc & (r >= 4 ? P_REXB_R : 0);
416 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
417
418 if (rex) {
419 tcg_out8(s, (uint8_t)(rex | 0x40));
420 }
421
422 if (opc & P_EXT) {
423 tcg_out8(s, 0x0f);
424 }
425 tcg_out8(s, opc);
426}
427#else
428static void tcg_out_opc(TCGContext *s, int opc)
429{
430 if (opc & P_DATA16) {
431 tcg_out8(s, 0x66);
432 }
433 if (opc & P_EXT) {
434 tcg_out8(s, 0x0f);
435 }
436 tcg_out8(s, opc);
437}
438/* Discard the register arguments to tcg_out_opc early, so as not to penalize
439 the 32-bit compilation paths. This method works with all versions of gcc,
440 whereas relying on optimization may not be able to exclude them. */
441#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
442#endif
443
444static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
445{
446 tcg_out_opc(s, opc, r, rm, 0);
447 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
448}
449
450/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
451 We handle either RM and INDEX missing with a negative value. In 64-bit
452 mode for absolute addresses, ~RM is the size of the immediate operand
453 that will follow the instruction. */
454
455static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
456 int index, int shift,
457 tcg_target_long offset)
458{
459 int mod, len;
460
461 if (index < 0 && rm < 0) {
462 if (TCG_TARGET_REG_BITS == 64) {
463 /* Try for a rip-relative addressing mode. This has replaced
464 the 32-bit-mode absolute addressing encoding. */
465#ifdef VBOX
466 tcg_target_long pc = (tcg_target_long)s->code_ptr
467 + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4;
468#else
469 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
470#endif
471 tcg_target_long disp = offset - pc;
472 if (disp == (int32_t)disp) {
473 tcg_out_opc(s, opc, r, 0, 0);
474 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
475 tcg_out32(s, disp);
476#ifdef VBOX
477 Assert(pc == (tcg_target_long)s->code_ptr);
478#endif
479 return;
480 }
481
482 /* Try for an absolute address encoding. This requires the
483 use of the MODRM+SIB encoding and is therefore larger than
484 rip-relative addressing. */
485 if (offset == (int32_t)offset) {
486 tcg_out_opc(s, opc, r, 0, 0);
487 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
488 tcg_out8(s, (4 << 3) | 5);
489 tcg_out32(s, offset);
490 return;
491 }
492
493 /* ??? The memory isn't directly addressable. */
494 tcg_abort();
495 } else {
496 /* Absolute address. */
497 tcg_out_opc(s, opc, r, 0, 0);
498 tcg_out8(s, (r << 3) | 5);
499 tcg_out32(s, offset);
500 return;
501 }
502 }
503
504 /* Find the length of the immediate addend. Note that the encoding
505 that would be used for (%ebp) indicates absolute addressing. */
506 if (rm < 0) {
507 mod = 0, len = 4, rm = 5;
508 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
509 mod = 0, len = 0;
510 } else if (offset == (int8_t)offset) {
511 mod = 0x40, len = 1;
512 } else {
513 mod = 0x80, len = 4;
514 }
515
516 /* Use a single byte MODRM format if possible. Note that the encoding
517 that would be used for %esp is the escape to the two byte form. */
518 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
519 /* Single byte MODRM format. */
520 tcg_out_opc(s, opc, r, rm, 0);
521 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
522 } else {
523 /* Two byte MODRM+SIB format. */
524
525 /* Note that the encoding that would place %esp into the index
526 field indicates no index register. In 64-bit mode, the REX.X
527 bit counts, so %r12 can be used as the index. */
528 if (index < 0) {
529 index = 4;
530 } else {
531 assert(index != TCG_REG_ESP);
532 }
533
534 tcg_out_opc(s, opc, r, rm, index);
535 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
536 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
537 }
538
539 if (len == 1) {
540 tcg_out8(s, offset);
541 } else if (len == 4) {
542 tcg_out32(s, offset);
543 }
544}
545
546/* A simplification of the above with no index or shift. */
547static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
548 int rm, tcg_target_long offset)
549{
550 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
551}
552
553/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
554static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
555{
556 /* Propagate an opcode prefix, such as P_REXW. */
557 int ext = subop & ~0x7;
558 subop &= 0x7;
559
560 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
561}
562
563static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
564{
565 if (arg != ret) {
566 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
567 tcg_out_modrm(s, opc, ret, arg);
568 }
569}
570
571static void tcg_out_movi(TCGContext *s, TCGType type,
572 int ret, tcg_target_long arg)
573{
574 if (arg == 0) {
575 tgen_arithr(s, ARITH_XOR, ret, ret);
576 return;
577 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
578 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
579 tcg_out32(s, arg);
580 } else if (arg == (int32_t)arg) {
581 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
582 tcg_out32(s, arg);
583 } else {
584 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
585 tcg_out32(s, arg);
586 tcg_out32(s, arg >> 31 >> 1);
587 }
588}
589
590static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
591{
592 if (val == (int8_t)val) {
593 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
594 tcg_out8(s, val);
595 } else if (val == (int32_t)val) {
596 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
597 tcg_out32(s, val);
598 } else {
599 tcg_abort();
600 }
601}
602
603static inline void tcg_out_push(TCGContext *s, int reg)
604{
605 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
606}
607
608static inline void tcg_out_pop(TCGContext *s, int reg)
609{
610 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
611}
612
613static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
614 int arg1, tcg_target_long arg2)
615{
616 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
617 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
618}
619
620static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
621 int arg1, tcg_target_long arg2)
622{
623 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
624 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
625}
626
627static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
628{
629 /* Propagate an opcode prefix, such as P_DATA16. */
630 int ext = subopc & ~0x7;
631 subopc &= 0x7;
632
633 if (count == 1) {
634 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
635 } else {
636 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
637 tcg_out8(s, count);
638 }
639}
640
641static inline void tcg_out_bswap32(TCGContext *s, int reg)
642{
643 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
644}
645
646static inline void tcg_out_rolw_8(TCGContext *s, int reg)
647{
648 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
649}
650
651static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
652{
653 /* movzbl */
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
656}
657
658static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
659{
660 /* movsbl */
661 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
662 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
663}
664
665static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
666{
667 /* movzwl */
668 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
669}
670
671static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
672{
673 /* movsw[lq] */
674 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
675}
676
677static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
678{
679 /* 32-bit mov zero extends. */
680 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
681}
682
683static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
684{
685 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
686}
687
688static inline void tcg_out_bswap64(TCGContext *s, int reg)
689{
690 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
691}
692
693static void tgen_arithi(TCGContext *s, int c, int r0,
694 tcg_target_long val, int cf)
695{
696 int rexw = 0;
697
698 if (TCG_TARGET_REG_BITS == 64) {
699 rexw = c & -8;
700 c &= 7;
701 }
702
703 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
704 partial flags update stalls on Pentium4 and are not recommended
705 by current Intel optimization manuals. */
706 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
707 int is_inc = (c == ARITH_ADD) ^ (val < 0);
708 if (TCG_TARGET_REG_BITS == 64) {
709 /* The single-byte increment encodings are re-tasked as the
710 REX prefixes. Use the MODRM encoding. */
711 tcg_out_modrm(s, OPC_GRP5 + rexw,
712 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
713 } else {
714 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
715 }
716 return;
717 }
718
719 if (c == ARITH_AND) {
720 if (TCG_TARGET_REG_BITS == 64) {
721 if (val == 0xffffffffu) {
722 tcg_out_ext32u(s, r0, r0);
723 return;
724 }
725 if (val == (uint32_t)val) {
726 /* AND with no high bits set can use a 32-bit operation. */
727 rexw = 0;
728 }
729 }
730 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
731 tcg_out_ext8u(s, r0, r0);
732 return;
733 }
734 if (val == 0xffffu) {
735 tcg_out_ext16u(s, r0, r0);
736 return;
737 }
738 }
739
740 if (val == (int8_t)val) {
741 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
742 tcg_out8(s, val);
743 return;
744 }
745 if (rexw == 0 || val == (int32_t)val) {
746 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
747 tcg_out32(s, val);
748 return;
749 }
750
751 tcg_abort();
752}
753
754static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
755{
756 if (val != 0) {
757 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
758 }
759}
760
761#ifdef VBOX
762static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
763{
764 if (val != 0) {
765 tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
766 }
767}
768#endif
769
770/* Use SMALL != 0 to force a short forward branch. */
771static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
772{
773 int32_t val, val1;
774 TCGLabel *l = &s->labels[label_index];
775
776 if (l->has_value) {
777 val = l->u.value - (tcg_target_long)s->code_ptr;
778 val1 = val - 2;
779 if ((int8_t)val1 == val1) {
780 if (opc == -1) {
781 tcg_out8(s, OPC_JMP_short);
782 } else {
783 tcg_out8(s, OPC_JCC_short + opc);
784 }
785 tcg_out8(s, val1);
786 } else {
787 if (small) {
788 tcg_abort();
789 }
790 if (opc == -1) {
791 tcg_out8(s, OPC_JMP_long);
792 tcg_out32(s, val - 5);
793 } else {
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
795 tcg_out32(s, val - 6);
796 }
797 }
798 } else if (small) {
799 if (opc == -1) {
800 tcg_out8(s, OPC_JMP_short);
801 } else {
802 tcg_out8(s, OPC_JCC_short + opc);
803 }
804 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
805 s->code_ptr += 1;
806 } else {
807 if (opc == -1) {
808 tcg_out8(s, OPC_JMP_long);
809 } else {
810 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
811 }
812 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
813 s->code_ptr += 4;
814 }
815}
816
817static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
818 int const_arg2, int rexw)
819{
820 if (const_arg2) {
821 if (arg2 == 0) {
822 /* test r, r */
823 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
824 } else {
825 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
826 }
827 } else {
828 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
829 }
830}
831
832static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
833 TCGArg arg1, TCGArg arg2, int const_arg2,
834 int label_index, int small)
835{
836 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
837 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
838}
839
840#if TCG_TARGET_REG_BITS == 64
841static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
842 TCGArg arg1, TCGArg arg2, int const_arg2,
843 int label_index, int small)
844{
845 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
846 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
847}
848#else
849/* XXX: we implement it at the target level to avoid having to
850 handle cross basic blocks temporaries */
851static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
852 const int *const_args, int small)
853{
854 int label_next;
855 label_next = gen_new_label();
856 switch(args[4]) {
857 case TCG_COND_EQ:
858 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
859 label_next, 1);
860 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
861 args[5], small);
862 break;
863 case TCG_COND_NE:
864 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
865 args[5], small);
866 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
867 args[5], small);
868 break;
869 case TCG_COND_LT:
870 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
871 args[5], small);
872 tcg_out_jxx(s, JCC_JNE, label_next, 1);
873 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
874 args[5], small);
875 break;
876 case TCG_COND_LE:
877 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
878 args[5], small);
879 tcg_out_jxx(s, JCC_JNE, label_next, 1);
880 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
881 args[5], small);
882 break;
883 case TCG_COND_GT:
884 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
885 args[5], small);
886 tcg_out_jxx(s, JCC_JNE, label_next, 1);
887 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
888 args[5], small);
889 break;
890 case TCG_COND_GE:
891 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
892 args[5], small);
893 tcg_out_jxx(s, JCC_JNE, label_next, 1);
894 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
895 args[5], small);
896 break;
897 case TCG_COND_LTU:
898 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
899 args[5], small);
900 tcg_out_jxx(s, JCC_JNE, label_next, 1);
901 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
902 args[5], small);
903 break;
904 case TCG_COND_LEU:
905 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
906 args[5], small);
907 tcg_out_jxx(s, JCC_JNE, label_next, 1);
908 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
909 args[5], small);
910 break;
911 case TCG_COND_GTU:
912 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
913 args[5], small);
914 tcg_out_jxx(s, JCC_JNE, label_next, 1);
915 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
916 args[5], small);
917 break;
918 case TCG_COND_GEU:
919 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
920 args[5], small);
921 tcg_out_jxx(s, JCC_JNE, label_next, 1);
922 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
923 args[5], small);
924 break;
925 default:
926 tcg_abort();
927 }
928 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
929}
930#endif
931
932static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
933 TCGArg arg1, TCGArg arg2, int const_arg2)
934{
935 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
936 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
937 tcg_out_ext8u(s, dest, dest);
938}
939
940#if TCG_TARGET_REG_BITS == 64
941static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
942 TCGArg arg1, TCGArg arg2, int const_arg2)
943{
944 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
945 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
946 tcg_out_ext8u(s, dest, dest);
947}
948#else
949static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
950 const int *const_args)
951{
952 TCGArg new_args[6];
953 int label_true, label_over;
954
955 memcpy(new_args, args+1, 5*sizeof(TCGArg));
956
957 if (args[0] == args[1] || args[0] == args[2]
958 || (!const_args[3] && args[0] == args[3])
959 || (!const_args[4] && args[0] == args[4])) {
960 /* When the destination overlaps with one of the argument
961 registers, don't do anything tricky. */
962 label_true = gen_new_label();
963 label_over = gen_new_label();
964
965 new_args[5] = label_true;
966 tcg_out_brcond2(s, new_args, const_args+1, 1);
967
968 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
969 tcg_out_jxx(s, JCC_JMP, label_over, 1);
970 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
971
972 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
973 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
974 } else {
975 /* When the destination does not overlap one of the arguments,
976 clear the destination first, jump if cond false, and emit an
977 increment in the true case. This results in smaller code. */
978
979 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
980
981 label_over = gen_new_label();
982 new_args[4] = tcg_invert_cond(new_args[4]);
983 new_args[5] = label_over;
984 tcg_out_brcond2(s, new_args, const_args+1, 1);
985
986 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
987 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
988 }
989}
990#endif
991
992static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
993{
994#ifdef VBOX
995 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr
996 - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0)
997 - 4;
998#else
999 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
1000#endif
1001
1002 if (disp == (int32_t)disp) {
1003 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1004 tcg_out32(s, disp);
1005 } else {
1006 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1007 tcg_out_modrm(s, OPC_GRP5,
1008 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1009 }
1010}
1011
1012static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
1013{
1014#ifdef VBOX
1015 tcg_gen_stack_alignment_check(s);
1016#endif
1017 tcg_out_branch(s, 1, dest);
1018}
1019
1020static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
1021{
1022 tcg_out_branch(s, 0, dest);
1023}
1024
1025#if defined(CONFIG_SOFTMMU)
1026
1027#include "../../softmmu_defs.h"
1028
1029static void *qemu_ld_helpers[4] = {
1030 __ldb_mmu,
1031 __ldw_mmu,
1032 __ldl_mmu,
1033 __ldq_mmu,
1034};
1035
1036static void *qemu_st_helpers[4] = {
1037 __stb_mmu,
1038 __stw_mmu,
1039 __stl_mmu,
1040 __stq_mmu,
1041};
1042
1043/* Perform the TLB load and compare.
1044
1045 Inputs:
1046 ADDRLO_IDX contains the index into ARGS of the low part of the
1047 address; the high part of the address is at ADDR_LOW_IDX+1.
1048
1049 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1050
1051 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1052 This should be offsetof addr_read or addr_write.
1053
1054 Outputs:
1055 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1056 positions of the displacements of forward jumps to the TLB miss case.
1057
1058 First argument register is loaded with the low part of the address.
1059 In the TLB hit case, it has been adjusted as indicated by the TLB
1060 and so is a host address. In the TLB miss case, it continues to
1061 hold a guest address.
1062
1063 Second argument register is clobbered. */
1064
1065static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1066 int mem_index, int s_bits,
1067 const TCGArg *args,
1068 uint8_t **label_ptr, int which)
1069{
1070 const int addrlo = args[addrlo_idx];
1071 const int r0 = tcg_target_call_iarg_regs[0];
1072 const int r1 = tcg_target_call_iarg_regs[1];
1073 TCGType type = TCG_TYPE_I32;
1074 int rexw = 0;
1075
1076 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1077 type = TCG_TYPE_I64;
1078 rexw = P_REXW;
1079 }
1080
1081 tcg_out_mov(s, type, r1, addrlo);
1082 tcg_out_mov(s, type, r0, addrlo);
1083
1084 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1085 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1086
1087 tgen_arithi(s, ARITH_AND + rexw, r0,
1088 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1089 tgen_arithi(s, ARITH_AND + rexw, r1,
1090 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1091
1092 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1093 offsetof(CPUState, tlb_table[mem_index][0])
1094 + which);
1095
1096 /* cmp 0(r1), r0 */
1097 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1098
1099 tcg_out_mov(s, type, r0, addrlo);
1100
1101 /* jne label1 */
1102 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1103 label_ptr[0] = s->code_ptr;
1104 s->code_ptr++;
1105
1106 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1107 /* cmp 4(r1), addrhi */
1108 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1109
1110 /* jne label1 */
1111 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1112 label_ptr[1] = s->code_ptr;
1113 s->code_ptr++;
1114 }
1115
1116 /* TLB Hit. */
1117
1118 /* add addend(r1), r0 */
1119 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1120 offsetof(CPUTLBEntry, addend) - which);
1121}
1122#endif
1123
1124static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1125 int base, tcg_target_long ofs, int sizeop)
1126{
1127#ifdef TARGET_WORDS_BIGENDIAN
1128 const int bswap = 1;
1129#else
1130 const int bswap = 0;
1131#endif
1132 switch (sizeop) {
1133 case 0:
1134 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1135 break;
1136 case 0 | 4:
1137 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1138 break;
1139 case 1:
1140 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1141 if (bswap) {
1142 tcg_out_rolw_8(s, datalo);
1143 }
1144 break;
1145 case 1 | 4:
1146 if (bswap) {
1147 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1148 tcg_out_rolw_8(s, datalo);
1149 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1150 } else {
1151 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1152 }
1153 break;
1154 case 2:
1155 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1156 if (bswap) {
1157 tcg_out_bswap32(s, datalo);
1158 }
1159 break;
1160#if TCG_TARGET_REG_BITS == 64
1161 case 2 | 4:
1162 if (bswap) {
1163 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1164 tcg_out_bswap32(s, datalo);
1165 tcg_out_ext32s(s, datalo, datalo);
1166 } else {
1167 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1168 }
1169 break;
1170#endif
1171 case 3:
1172 if (TCG_TARGET_REG_BITS == 64) {
1173 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1174 if (bswap) {
1175 tcg_out_bswap64(s, datalo);
1176 }
1177 } else {
1178 if (bswap) {
1179 int t = datalo;
1180 datalo = datahi;
1181 datahi = t;
1182 }
1183 if (base != datalo) {
1184 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1185 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1186 } else {
1187 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1188 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1189 }
1190 if (bswap) {
1191 tcg_out_bswap32(s, datalo);
1192 tcg_out_bswap32(s, datahi);
1193 }
1194 }
1195 break;
1196 default:
1197 tcg_abort();
1198 }
1199}
1200
1201#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
1202
1203static void * const vbox_ld_helpers[] = {
1204 __ldub_vbox_phys,
1205 __lduw_vbox_phys,
1206 __ldul_vbox_phys,
1207 __ldq_vbox_phys,
1208 __ldb_vbox_phys,
1209 __ldw_vbox_phys,
1210 __ldl_vbox_phys,
1211 __ldq_vbox_phys,
1212};
1213
1214static void * const vbox_st_helpers[] = {
1215 __stb_vbox_phys,
1216 __stw_vbox_phys,
1217 __stl_vbox_phys,
1218 __stq_vbox_phys
1219};
1220
1221DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
1222{
1223 intptr_t disp;
1224# ifdef VBOX
1225 tcg_gen_stack_alignment_check(s);
1226# endif
1227 disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
1228 tcg_out8(s, 0xe8); /* call disp32 */
1229 tcg_out32(s, disp); /* disp32 */
1230}
1231
1232static void tcg_out_vbox_phys_read(TCGContext *s, int index,
1233 int addr_reg,
1234 int data_reg, int data_reg2)
1235{
1236 int useReg2 = ((index & 3) == 3);
1237
1238 /** @todo: should we make phys address accessors fastcalls - probably not a big deal */
1239 /* out parameter (address), note that phys address is always 64-bit */
1240 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1241
1242# if 0
1243 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1244 tcg_out_push(s, addr_reg);
1245# else
1246 /* mov addr_reg, %eax */
1247 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1248# endif
1249
1250 tcg_out_long_call(s, vbox_ld_helpers[index]);
1251
1252 /* mov %eax, data_reg */
1253 tcg_out_mov(s, data_reg, TCG_REG_EAX);
1254
1255 /* returned 64-bit value */
1256 if (useReg2)
1257 tcg_out_mov(s, data_reg2, TCG_REG_EDX);
1258}
1259
1260static void tcg_out_vbox_phys_write(TCGContext *s, int index,
1261 int addr_reg,
1262 int val_reg, int val_reg2) {
1263 int useReg2 = ((index & 3) == 3);
1264
1265# if 0
1266 /* out parameter (value2) */
1267 if (useReg2)
1268 tcg_out_push(s, val_reg2);
1269 /* out parameter (value) */
1270 tcg_out_push(s, val_reg);
1271 /* out parameter (address), note that phys address is always 64-bit */
1272 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1273 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1274 tcg_out_push(s, addr_reg);
1275# else
1276 Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
1277 /* mov addr_reg, %eax */
1278 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1279 Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
1280 /* mov val_reg, %edx */
1281 tcg_out_mov(s, TCG_REG_EDX, val_reg);
1282 if (useReg2)
1283 tcg_out_mov(s, TCG_REG_ECX, val_reg2);
1284
1285# endif
1286 /* call it */
1287 tcg_out_long_call(s, vbox_st_helpers[index]);
1288
1289 /* clean stack after us */
1290# if 0
1291 tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
1292# endif
1293}
1294
1295#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
1296
1297/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1298 EAX. It will be useful once fixed registers globals are less
1299 common. */
1300static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1301 int opc)
1302{
1303 int data_reg, data_reg2 = 0;
1304 int addrlo_idx;
1305#if defined(CONFIG_SOFTMMU)
1306 int mem_index, s_bits, arg_idx;
1307 uint8_t *label_ptr[3];
1308#endif
1309
1310 data_reg = args[0];
1311 addrlo_idx = 1;
1312 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1313 data_reg2 = args[1];
1314 addrlo_idx = 2;
1315 }
1316
1317#if defined(CONFIG_SOFTMMU)
1318 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1319 s_bits = opc & 3;
1320
1321 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1322 label_ptr, offsetof(CPUTLBEntry, addr_read));
1323
1324 /* TLB Hit. */
1325 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1326 tcg_target_call_iarg_regs[0], 0, opc);
1327
1328 /* jmp label2 */
1329 tcg_out8(s, OPC_JMP_short);
1330 label_ptr[2] = s->code_ptr;
1331 s->code_ptr++;
1332
1333 /* TLB Miss. */
1334
1335 /* label1: */
1336 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1337 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1338 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1339 }
1340
1341 /* XXX: move that code at the end of the TB */
1342 /* The first argument is already loaded with addrlo. */
1343 arg_idx = 1;
1344 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1345 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1346 args[addrlo_idx + 1]);
1347 }
1348 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1349 mem_index);
1350 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1351
1352 switch(opc) {
1353 case 0 | 4:
1354 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1355 break;
1356 case 1 | 4:
1357 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1358 break;
1359 case 0:
1360 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1361 break;
1362 case 1:
1363 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1364 break;
1365 case 2:
1366 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1367 break;
1368#if TCG_TARGET_REG_BITS == 64
1369 case 2 | 4:
1370 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1371 break;
1372#endif
1373 case 3:
1374 if (TCG_TARGET_REG_BITS == 64) {
1375 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1376 } else if (data_reg == TCG_REG_EDX) {
1377 /* xchg %edx, %eax */
1378 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1379 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1380 } else {
1381 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1382 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1383 }
1384 break;
1385 default:
1386 tcg_abort();
1387 }
1388
1389 /* label2: */
1390 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1391#else
1392 {
1393 int32_t offset = GUEST_BASE;
1394 int base = args[addrlo_idx];
1395
1396 if (TCG_TARGET_REG_BITS == 64) {
1397 /* ??? We assume all operations have left us with register
1398 contents that are zero extended. So far this appears to
1399 be true. If we want to enforce this, we can either do
1400 an explicit zero-extension here, or (if GUEST_BASE == 0)
1401 use the ADDR32 prefix. For now, do nothing. */
1402
1403 if (offset != GUEST_BASE) {
1404 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1405 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1406 base = TCG_REG_RDI, offset = 0;
1407 }
1408 }
1409
1410 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1411 }
1412#endif
1413}
1414
1415static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1416 int base, tcg_target_long ofs, int sizeop)
1417{
1418#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1419#ifdef TARGET_WORDS_BIGENDIAN
1420 const int bswap = 1;
1421#else
1422 const int bswap = 0;
1423#endif
1424 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1425 we could perform the bswap twice to restore the original value
1426 instead of moving to the scratch. But as it is, the L constraint
1427 means that the second argument reg is definitely free here. */
1428 int scratch = tcg_target_call_iarg_regs[1];
1429
1430 switch (sizeop) {
1431 case 0:
1432 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1433 break;
1434 case 1:
1435 if (bswap) {
1436 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1437 tcg_out_rolw_8(s, scratch);
1438 datalo = scratch;
1439 }
1440 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1441 break;
1442 case 2:
1443 if (bswap) {
1444 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1445 tcg_out_bswap32(s, scratch);
1446 datalo = scratch;
1447 }
1448 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1449 break;
1450 case 3:
1451 if (TCG_TARGET_REG_BITS == 64) {
1452 if (bswap) {
1453 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1454 tcg_out_bswap64(s, scratch);
1455 datalo = scratch;
1456 }
1457 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1458 } else if (bswap) {
1459 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1460 tcg_out_bswap32(s, scratch);
1461 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1462 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1463 tcg_out_bswap32(s, scratch);
1464 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1465 } else {
1466 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1467 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1468 }
1469 break;
1470 default:
1471 tcg_abort();
1472 }
1473#else /* VBOX */
1474# error "broken"
1475 tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
1476#endif
1477}
1478
1479static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1480 int opc)
1481{
1482 int data_reg, data_reg2 = 0;
1483 int addrlo_idx;
1484#if defined(CONFIG_SOFTMMU)
1485 int mem_index, s_bits;
1486 int stack_adjust;
1487 uint8_t *label_ptr[3];
1488#endif
1489
1490 data_reg = args[0];
1491 addrlo_idx = 1;
1492 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1493 data_reg2 = args[1];
1494 addrlo_idx = 2;
1495 }
1496
1497#if defined(CONFIG_SOFTMMU)
1498 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1499 s_bits = opc;
1500
1501 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1502 label_ptr, offsetof(CPUTLBEntry, addr_write));
1503
1504 /* TLB Hit. */
1505 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1506 tcg_target_call_iarg_regs[0], 0, opc);
1507
1508 /* jmp label2 */
1509 tcg_out8(s, OPC_JMP_short);
1510 label_ptr[2] = s->code_ptr;
1511 s->code_ptr++;
1512
1513 /* TLB Miss. */
1514
1515 /* label1: */
1516 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1517 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1518 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1519 }
1520
1521#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1522
1523 /* XXX: move that code at the end of the TB */
1524 if (TCG_TARGET_REG_BITS == 64) {
1525 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1526 TCG_REG_RSI, data_reg);
1527 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1528 stack_adjust = 0;
1529 } else if (TARGET_LONG_BITS == 32) {
1530 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1531 if (opc == 3) {
1532 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1533 tcg_out_pushi(s, mem_index);
1534 stack_adjust = 4;
1535 } else {
1536 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1537 stack_adjust = 0;
1538 }
1539 } else {
1540 if (opc == 3) {
1541 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1542 tcg_out_pushi(s, mem_index);
1543 tcg_out_push(s, data_reg2);
1544 tcg_out_push(s, data_reg);
1545 stack_adjust = 12;
1546 } else {
1547 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1548 switch(opc) {
1549 case 0:
1550 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1551 break;
1552 case 1:
1553 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1554 break;
1555 case 2:
1556 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1557 break;
1558 }
1559 tcg_out_pushi(s, mem_index);
1560 stack_adjust = 4;
1561 }
1562 }
1563
1564 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1565
1566 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1567 /* Pop and discard. This is 2 bytes smaller than the add. */
1568 tcg_out_pop(s, TCG_REG_ECX);
1569 } else if (stack_adjust != 0) {
1570 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1571 }
1572
1573#else /* VBOX && REM_PHYS_ADDR_IN_TLB */
1574# error Borked
1575 tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
1576#endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
1577
1578 /* label2: */
1579 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1580#else
1581 {
1582 int32_t offset = GUEST_BASE;
1583 int base = args[addrlo_idx];
1584
1585 if (TCG_TARGET_REG_BITS == 64) {
1586 /* ??? We assume all operations have left us with register
1587 contents that are zero extended. So far this appears to
1588 be true. If we want to enforce this, we can either do
1589 an explicit zero-extension here, or (if GUEST_BASE == 0)
1590 use the ADDR32 prefix. For now, do nothing. */
1591
1592 if (offset != GUEST_BASE) {
1593 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1594 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1595 base = TCG_REG_RDI, offset = 0;
1596 }
1597 }
1598
1599 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1600 }
1601#endif
1602}
1603
1604static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1605 const TCGArg *args, const int *const_args)
1606{
1607 int c, rexw = 0;
1608
1609#if TCG_TARGET_REG_BITS == 64
1610# define OP_32_64(x) \
1611 case glue(glue(INDEX_op_, x), _i64): \
1612 rexw = P_REXW; /* FALLTHRU */ \
1613 case glue(glue(INDEX_op_, x), _i32)
1614#else
1615# define OP_32_64(x) \
1616 case glue(glue(INDEX_op_, x), _i32)
1617#endif
1618
1619 switch(opc) {
1620 case INDEX_op_exit_tb:
1621 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1622 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1623 break;
1624 case INDEX_op_goto_tb:
1625 if (s->tb_jmp_offset) {
1626 /* direct jump method */
1627 tcg_out8(s, OPC_JMP_long); /* jmp im */
1628 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1629 tcg_out32(s, 0);
1630 } else {
1631 /* indirect jump method */
1632 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1633 (tcg_target_long)(s->tb_next + args[0]));
1634 }
1635 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1636 break;
1637 case INDEX_op_call:
1638 if (const_args[0]) {
1639 tcg_out_calli(s, args[0]);
1640 } else {
1641 /* call *reg */
1642 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1643 }
1644 break;
1645 case INDEX_op_jmp:
1646 if (const_args[0]) {
1647 tcg_out_jmp(s, args[0]);
1648 } else {
1649 /* jmp *reg */
1650 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1651 }
1652 break;
1653 case INDEX_op_br:
1654 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1655 break;
1656 case INDEX_op_movi_i32:
1657 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1658 break;
1659 OP_32_64(ld8u):
1660 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1661 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1662 break;
1663 OP_32_64(ld8s):
1664 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1665 break;
1666 OP_32_64(ld16u):
1667 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1668 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1669 break;
1670 OP_32_64(ld16s):
1671 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1672 break;
1673#if TCG_TARGET_REG_BITS == 64
1674 case INDEX_op_ld32u_i64:
1675#endif
1676 case INDEX_op_ld_i32:
1677 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1678 break;
1679
1680 OP_32_64(st8):
1681 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1682 args[0], args[1], args[2]);
1683 break;
1684 OP_32_64(st16):
1685 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1686 args[0], args[1], args[2]);
1687 break;
1688#if TCG_TARGET_REG_BITS == 64
1689 case INDEX_op_st32_i64:
1690#endif
1691 case INDEX_op_st_i32:
1692 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1693 break;
1694
1695 OP_32_64(add):
1696 /* For 3-operand addition, use LEA. */
1697 if (args[0] != args[1]) {
1698 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1699
1700 if (const_args[2]) {
1701 c3 = a2, a2 = -1;
1702 } else if (a0 == a2) {
1703 /* Watch out for dest = src + dest, since we've removed
1704 the matching constraint on the add. */
1705 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1706 break;
1707 }
1708
1709 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1710 break;
1711 }
1712 c = ARITH_ADD;
1713 goto gen_arith;
1714 OP_32_64(sub):
1715 c = ARITH_SUB;
1716 goto gen_arith;
1717 OP_32_64(and):
1718 c = ARITH_AND;
1719 goto gen_arith;
1720 OP_32_64(or):
1721 c = ARITH_OR;
1722 goto gen_arith;
1723 OP_32_64(xor):
1724 c = ARITH_XOR;
1725 goto gen_arith;
1726 gen_arith:
1727 if (const_args[2]) {
1728 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1729 } else {
1730 tgen_arithr(s, c + rexw, args[0], args[2]);
1731 }
1732 break;
1733
1734 OP_32_64(mul):
1735 if (const_args[2]) {
1736 int32_t val;
1737 val = args[2];
1738 if (val == (int8_t)val) {
1739 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1740 tcg_out8(s, val);
1741 } else {
1742 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1743 tcg_out32(s, val);
1744 }
1745 } else {
1746 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1747 }
1748 break;
1749
1750 OP_32_64(div2):
1751 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1752 break;
1753 OP_32_64(divu2):
1754 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1755 break;
1756
1757 OP_32_64(shl):
1758 c = SHIFT_SHL;
1759 goto gen_shift;
1760 OP_32_64(shr):
1761 c = SHIFT_SHR;
1762 goto gen_shift;
1763 OP_32_64(sar):
1764 c = SHIFT_SAR;
1765 goto gen_shift;
1766 OP_32_64(rotl):
1767 c = SHIFT_ROL;
1768 goto gen_shift;
1769 OP_32_64(rotr):
1770 c = SHIFT_ROR;
1771 goto gen_shift;
1772 gen_shift:
1773 if (const_args[2]) {
1774 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1775 } else {
1776 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1777 }
1778 break;
1779
1780 case INDEX_op_brcond_i32:
1781 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1782 args[3], 0);
1783 break;
1784 case INDEX_op_setcond_i32:
1785 tcg_out_setcond32(s, args[3], args[0], args[1],
1786 args[2], const_args[2]);
1787 break;
1788
1789 OP_32_64(bswap16):
1790 tcg_out_rolw_8(s, args[0]);
1791 break;
1792 OP_32_64(bswap32):
1793 tcg_out_bswap32(s, args[0]);
1794 break;
1795
1796 OP_32_64(neg):
1797 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1798 break;
1799 OP_32_64(not):
1800 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1801 break;
1802
1803 OP_32_64(ext8s):
1804 tcg_out_ext8s(s, args[0], args[1], rexw);
1805 break;
1806 OP_32_64(ext16s):
1807 tcg_out_ext16s(s, args[0], args[1], rexw);
1808 break;
1809 OP_32_64(ext8u):
1810 tcg_out_ext8u(s, args[0], args[1]);
1811 break;
1812 OP_32_64(ext16u):
1813 tcg_out_ext16u(s, args[0], args[1]);
1814 break;
1815
1816 case INDEX_op_qemu_ld8u:
1817 tcg_out_qemu_ld(s, args, 0);
1818 break;
1819 case INDEX_op_qemu_ld8s:
1820 tcg_out_qemu_ld(s, args, 0 | 4);
1821 break;
1822 case INDEX_op_qemu_ld16u:
1823 tcg_out_qemu_ld(s, args, 1);
1824 break;
1825 case INDEX_op_qemu_ld16s:
1826 tcg_out_qemu_ld(s, args, 1 | 4);
1827 break;
1828#if TCG_TARGET_REG_BITS == 64
1829 case INDEX_op_qemu_ld32u:
1830#endif
1831 case INDEX_op_qemu_ld32:
1832 tcg_out_qemu_ld(s, args, 2);
1833 break;
1834 case INDEX_op_qemu_ld64:
1835 tcg_out_qemu_ld(s, args, 3);
1836 break;
1837
1838 case INDEX_op_qemu_st8:
1839 tcg_out_qemu_st(s, args, 0);
1840 break;
1841 case INDEX_op_qemu_st16:
1842 tcg_out_qemu_st(s, args, 1);
1843 break;
1844 case INDEX_op_qemu_st32:
1845 tcg_out_qemu_st(s, args, 2);
1846 break;
1847 case INDEX_op_qemu_st64:
1848 tcg_out_qemu_st(s, args, 3);
1849 break;
1850
1851#if TCG_TARGET_REG_BITS == 32
1852 case INDEX_op_brcond2_i32:
1853 tcg_out_brcond2(s, args, const_args, 0);
1854 break;
1855 case INDEX_op_setcond2_i32:
1856 tcg_out_setcond2(s, args, const_args);
1857 break;
1858 case INDEX_op_mulu2_i32:
1859 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1860 break;
1861 case INDEX_op_add2_i32:
1862 if (const_args[4]) {
1863 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1864 } else {
1865 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1866 }
1867 if (const_args[5]) {
1868 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1869 } else {
1870 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1871 }
1872 break;
1873 case INDEX_op_sub2_i32:
1874 if (const_args[4]) {
1875 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1876 } else {
1877 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1878 }
1879 if (const_args[5]) {
1880 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1881 } else {
1882 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1883 }
1884 break;
1885#else /* TCG_TARGET_REG_BITS == 64 */
1886 case INDEX_op_movi_i64:
1887 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1888 break;
1889 case INDEX_op_ld32s_i64:
1890 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1891 break;
1892 case INDEX_op_ld_i64:
1893 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1894 break;
1895 case INDEX_op_st_i64:
1896 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1897 break;
1898 case INDEX_op_qemu_ld32s:
1899 tcg_out_qemu_ld(s, args, 2 | 4);
1900 break;
1901
1902 case INDEX_op_brcond_i64:
1903 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1904 args[3], 0);
1905 break;
1906 case INDEX_op_setcond_i64:
1907 tcg_out_setcond64(s, args[3], args[0], args[1],
1908 args[2], const_args[2]);
1909 break;
1910
1911 case INDEX_op_bswap64_i64:
1912 tcg_out_bswap64(s, args[0]);
1913 break;
1914 case INDEX_op_ext32u_i64:
1915 tcg_out_ext32u(s, args[0], args[1]);
1916 break;
1917 case INDEX_op_ext32s_i64:
1918 tcg_out_ext32s(s, args[0], args[1]);
1919 break;
1920#endif
1921
1922 default:
1923 tcg_abort();
1924 }
1925
1926#undef OP_32_64
1927}
1928
1929static const TCGTargetOpDef x86_op_defs[] = {
1930 { INDEX_op_exit_tb, { } },
1931 { INDEX_op_goto_tb, { } },
1932 { INDEX_op_call, { "ri" } },
1933 { INDEX_op_jmp, { "ri" } },
1934 { INDEX_op_br, { } },
1935 { INDEX_op_mov_i32, { "r", "r" } },
1936 { INDEX_op_movi_i32, { "r" } },
1937 { INDEX_op_ld8u_i32, { "r", "r" } },
1938 { INDEX_op_ld8s_i32, { "r", "r" } },
1939 { INDEX_op_ld16u_i32, { "r", "r" } },
1940 { INDEX_op_ld16s_i32, { "r", "r" } },
1941 { INDEX_op_ld_i32, { "r", "r" } },
1942 { INDEX_op_st8_i32, { "q", "r" } },
1943 { INDEX_op_st16_i32, { "r", "r" } },
1944 { INDEX_op_st_i32, { "r", "r" } },
1945
1946 { INDEX_op_add_i32, { "r", "r", "ri" } },
1947 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1948 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1949 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1950 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1951 { INDEX_op_and_i32, { "r", "0", "ri" } },
1952 { INDEX_op_or_i32, { "r", "0", "ri" } },
1953 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1954
1955 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1956 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1957 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1958 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1959 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1960
1961 { INDEX_op_brcond_i32, { "r", "ri" } },
1962
1963 { INDEX_op_bswap16_i32, { "r", "0" } },
1964 { INDEX_op_bswap32_i32, { "r", "0" } },
1965
1966 { INDEX_op_neg_i32, { "r", "0" } },
1967
1968 { INDEX_op_not_i32, { "r", "0" } },
1969
1970 { INDEX_op_ext8s_i32, { "r", "q" } },
1971 { INDEX_op_ext16s_i32, { "r", "r" } },
1972 { INDEX_op_ext8u_i32, { "r", "q" } },
1973 { INDEX_op_ext16u_i32, { "r", "r" } },
1974
1975 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1976
1977#if TCG_TARGET_REG_BITS == 32
1978 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1979 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1980 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1981 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1982 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1983#else
1984 { INDEX_op_mov_i64, { "r", "r" } },
1985 { INDEX_op_movi_i64, { "r" } },
1986 { INDEX_op_ld8u_i64, { "r", "r" } },
1987 { INDEX_op_ld8s_i64, { "r", "r" } },
1988 { INDEX_op_ld16u_i64, { "r", "r" } },
1989 { INDEX_op_ld16s_i64, { "r", "r" } },
1990 { INDEX_op_ld32u_i64, { "r", "r" } },
1991 { INDEX_op_ld32s_i64, { "r", "r" } },
1992 { INDEX_op_ld_i64, { "r", "r" } },
1993 { INDEX_op_st8_i64, { "r", "r" } },
1994 { INDEX_op_st16_i64, { "r", "r" } },
1995 { INDEX_op_st32_i64, { "r", "r" } },
1996 { INDEX_op_st_i64, { "r", "r" } },
1997
1998 { INDEX_op_add_i64, { "r", "0", "re" } },
1999 { INDEX_op_mul_i64, { "r", "0", "re" } },
2000 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2001 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2002 { INDEX_op_sub_i64, { "r", "0", "re" } },
2003 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2004 { INDEX_op_or_i64, { "r", "0", "re" } },
2005 { INDEX_op_xor_i64, { "r", "0", "re" } },
2006
2007 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2008 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2009 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2010 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2011 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2012
2013 { INDEX_op_brcond_i64, { "r", "re" } },
2014 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2015
2016 { INDEX_op_bswap16_i64, { "r", "0" } },
2017 { INDEX_op_bswap32_i64, { "r", "0" } },
2018 { INDEX_op_bswap64_i64, { "r", "0" } },
2019 { INDEX_op_neg_i64, { "r", "0" } },
2020 { INDEX_op_not_i64, { "r", "0" } },
2021
2022 { INDEX_op_ext8s_i64, { "r", "r" } },
2023 { INDEX_op_ext16s_i64, { "r", "r" } },
2024 { INDEX_op_ext32s_i64, { "r", "r" } },
2025 { INDEX_op_ext8u_i64, { "r", "r" } },
2026 { INDEX_op_ext16u_i64, { "r", "r" } },
2027 { INDEX_op_ext32u_i64, { "r", "r" } },
2028#endif
2029
2030#if TCG_TARGET_REG_BITS == 64
2031 { INDEX_op_qemu_ld8u, { "r", "L" } },
2032 { INDEX_op_qemu_ld8s, { "r", "L" } },
2033 { INDEX_op_qemu_ld16u, { "r", "L" } },
2034 { INDEX_op_qemu_ld16s, { "r", "L" } },
2035 { INDEX_op_qemu_ld32, { "r", "L" } },
2036 { INDEX_op_qemu_ld32u, { "r", "L" } },
2037 { INDEX_op_qemu_ld32s, { "r", "L" } },
2038 { INDEX_op_qemu_ld64, { "r", "L" } },
2039
2040 { INDEX_op_qemu_st8, { "L", "L" } },
2041 { INDEX_op_qemu_st16, { "L", "L" } },
2042 { INDEX_op_qemu_st32, { "L", "L" } },
2043 { INDEX_op_qemu_st64, { "L", "L" } },
2044#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2045 { INDEX_op_qemu_ld8u, { "r", "L" } },
2046 { INDEX_op_qemu_ld8s, { "r", "L" } },
2047 { INDEX_op_qemu_ld16u, { "r", "L" } },
2048 { INDEX_op_qemu_ld16s, { "r", "L" } },
2049 { INDEX_op_qemu_ld32, { "r", "L" } },
2050 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2051
2052 { INDEX_op_qemu_st8, { "cb", "L" } },
2053 { INDEX_op_qemu_st16, { "L", "L" } },
2054 { INDEX_op_qemu_st32, { "L", "L" } },
2055 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2056#else
2057 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2058 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2059 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2060 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2061 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2062 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2063
2064 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2065 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2066 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2067 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2068#endif
2069 { -1 },
2070};
2071
2072static int tcg_target_callee_save_regs[] = {
2073#if TCG_TARGET_REG_BITS == 64
2074 TCG_REG_RBP,
2075 TCG_REG_RBX,
2076 TCG_REG_R12,
2077 TCG_REG_R13,
2078 /* TCG_REG_R14, */ /* Currently used for the global env. */
2079 TCG_REG_R15,
2080#else
2081# ifndef VBOX
2082 /* TCG_REG_EBP, */ /* Currently used for the global env. */
2083 TCG_REG_EBX,
2084 TCG_REG_ESI,
2085 TCG_REG_EDI,
2086# else
2087 TCG_REG_EBP,
2088 TCG_REG_EBX,
2089 /* TCG_REG_ESI, */ /* Currently used for the global env. */
2090 TCG_REG_EDI,
2091# endif
2092#endif
2093};
2094
2095/* Generate global QEMU prologue and epilogue code */
2096static void tcg_target_qemu_prologue(TCGContext *s)
2097{
2098 int i, frame_size, push_size, stack_addend;
2099
2100 /* TB prologue */
2101
2102 /* Save all callee saved registers. */
2103 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2104 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2105 }
2106# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
2107 tcg_out8(s, 0x31); /* xor ebp, ebp */
2108 tcg_out8(s, 0xed);
2109# endif
2110
2111 /* Reserve some stack space. */
2112 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
2113 push_size *= TCG_TARGET_REG_BITS / 8;
2114
2115 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
2116 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
2117 ~(TCG_TARGET_STACK_ALIGN - 1);
2118 stack_addend = frame_size - push_size;
2119 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2120
2121 /* jmp *tb. */
2122 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
2123# ifdef VBOX
2124 tcg_gen_stack_alignment_check(s);
2125# endif
2126
2127 tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
2128
2129 /* TB epilogue */
2130 tb_ret_addr = s->code_ptr;
2131
2132 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
2133
2134 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2135 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2136 }
2137 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2138}
2139
2140static void tcg_target_init(TCGContext *s)
2141{
2142#if !defined(CONFIG_USER_ONLY)
2143 /* fail safe */
2144 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2145 tcg_abort();
2146#endif
2147
2148 if (TCG_TARGET_REG_BITS == 64) {
2149 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2150 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2151 } else {
2152 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2153 }
2154
2155 tcg_regset_clear(tcg_target_call_clobber_regs);
2156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2157 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2158 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2159 if (TCG_TARGET_REG_BITS == 64) {
2160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2162 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2163 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2164 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2165 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2166 }
2167
2168 tcg_regset_clear(s->reserved_regs);
2169 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
2170
2171 tcg_add_target_add_op_defs(x86_op_defs);
2172}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette