VirtualBox

source: vbox/trunk/src/recompiler/tcg/i386/tcg-target.c@ 37689

Last change on this file since 37689 was 37689, checked in by vboxsync, 13 years ago

recompiler: Merged in changes from 0.13.0.

  • Property svn:eol-style set to native
File size: 64.7 KB
Line 
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53#else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61#endif
62};
63
64static const int tcg_target_call_iarg_regs[] = {
65#if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72#else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76#endif
77};
78
79static const int tcg_target_call_oarg_regs[2] = {
80 TCG_REG_EAX,
81 TCG_REG_EDX
82};
83
84static uint8_t *tb_ret_addr;
85
86static void patch_reloc(uint8_t *code_ptr, int type,
87 tcg_target_long value, tcg_target_long addend)
88{
89 value += addend;
90 switch(type) {
91 case R_386_PC32:
92 value -= (uintptr_t)code_ptr;
93 if (value != (int32_t)value) {
94 tcg_abort();
95 }
96 *(uint32_t *)code_ptr = value;
97 break;
98 case R_386_PC8:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int8_t)value) {
101 tcg_abort();
102 }
103 *(uint8_t *)code_ptr = value;
104 break;
105 default:
106 tcg_abort();
107 }
108}
109
110#ifdef VBOX
111/* emits stack alignment checks for strict builds. */
112DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
113{
114# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
115 tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
116 tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
117 tcg_out8(s, 0x74); /* jz imm8 */
118 tcg_out8(s, 1); /* $+3 (over int3) */
119 tcg_out8(s, 0xcc); /* int3 */
120# else
121 NOREF(s);
122# endif
123}
124#endif /* VBOX */
125
126/* maximum number of register used for input function arguments */
127static inline int tcg_target_get_call_iarg_regs_count(int flags)
128{
129 if (TCG_TARGET_REG_BITS == 64) {
130 return 6;
131 }
132
133 flags &= TCG_CALL_TYPE_MASK;
134 switch(flags) {
135 case TCG_CALL_TYPE_STD:
136 return 0;
137 case TCG_CALL_TYPE_REGPARM_1:
138 case TCG_CALL_TYPE_REGPARM_2:
139 case TCG_CALL_TYPE_REGPARM:
140 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
141 default:
142 tcg_abort();
143 }
144}
145
146/* parse target specific constraints */
147static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148{
149 const char *ct_str;
150
151 ct_str = *pct_str;
152 switch(ct_str[0]) {
153 case 'a':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156 break;
157 case 'b':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160 break;
161 case 'c':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164 break;
165 case 'd':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168 break;
169 case 'S':
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172 break;
173 case 'D':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176 break;
177 case 'q':
178 ct->ct |= TCG_CT_REG;
179 if (TCG_TARGET_REG_BITS == 64) {
180 tcg_regset_set32(ct->u.regs, 0, 0xffff);
181 } else {
182 tcg_regset_set32(ct->u.regs, 0, 0xf);
183 }
184 break;
185 case 'r':
186 ct->ct |= TCG_CT_REG;
187 if (TCG_TARGET_REG_BITS == 64) {
188 tcg_regset_set32(ct->u.regs, 0, 0xffff);
189 } else {
190 tcg_regset_set32(ct->u.regs, 0, 0xff);
191 }
192 break;
193
194 /* qemu_ld/st address constraint */
195 case 'L':
196 ct->ct |= TCG_CT_REG;
197 if (TCG_TARGET_REG_BITS == 64) {
198 tcg_regset_set32(ct->u.regs, 0, 0xffff);
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
201 } else {
202 tcg_regset_set32(ct->u.regs, 0, 0xff);
203 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
204 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
205 }
206 break;
207
208 case 'e':
209 ct->ct |= TCG_CT_CONST_S32;
210 break;
211 case 'Z':
212 ct->ct |= TCG_CT_CONST_U32;
213 break;
214
215 default:
216 return -1;
217 }
218 ct_str++;
219 *pct_str = ct_str;
220 return 0;
221}
222
223/* test if a constant matches the constraint */
224static inline int tcg_target_const_match(tcg_target_long val,
225 const TCGArgConstraint *arg_ct)
226{
227 int ct = arg_ct->ct;
228 if (ct & TCG_CT_CONST) {
229 return 1;
230 }
231 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
232 return 1;
233 }
234 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
235 return 1;
236 }
237 return 0;
238}
239
240#if TCG_TARGET_REG_BITS == 64
241# define LOWREGMASK(x) ((x) & 7)
242#else
243# define LOWREGMASK(x) (x)
244#endif
245
246#define P_EXT 0x100 /* 0x0f opcode prefix */
247#define P_DATA16 0x200 /* 0x66 opcode prefix */
248#if TCG_TARGET_REG_BITS == 64
249# define P_ADDR32 0x400 /* 0x67 opcode prefix */
250# define P_REXW 0x800 /* Set REX.W = 1 */
251# define P_REXB_R 0x1000 /* REG field as byte register */
252# define P_REXB_RM 0x2000 /* R/M field as byte register */
253#else
254# define P_ADDR32 0
255# define P_REXW 0
256# define P_REXB_R 0
257# define P_REXB_RM 0
258#endif
259
260#define OPC_ARITH_EvIz (0x81)
261#define OPC_ARITH_EvIb (0x83)
262#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
263#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
264#define OPC_BSWAP (0xc8 | P_EXT)
265#define OPC_CALL_Jz (0xe8)
266#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
267#define OPC_DEC_r32 (0x48)
268#define OPC_IMUL_GvEv (0xaf | P_EXT)
269#define OPC_IMUL_GvEvIb (0x6b)
270#define OPC_IMUL_GvEvIz (0x69)
271#define OPC_INC_r32 (0x40)
272#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
273#define OPC_JCC_short (0x70) /* ... plus condition code */
274#define OPC_JMP_long (0xe9)
275#define OPC_JMP_short (0xeb)
276#define OPC_LEA (0x8d)
277#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
278#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
279#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
280#define OPC_MOVL_EvIz (0xc7)
281#define OPC_MOVL_Iv (0xb8)
282#define OPC_MOVSBL (0xbe | P_EXT)
283#define OPC_MOVSWL (0xbf | P_EXT)
284#define OPC_MOVSLQ (0x63 | P_REXW)
285#define OPC_MOVZBL (0xb6 | P_EXT)
286#define OPC_MOVZWL (0xb7 | P_EXT)
287#define OPC_POP_r32 (0x58)
288#define OPC_PUSH_r32 (0x50)
289#define OPC_PUSH_Iv (0x68)
290#define OPC_PUSH_Ib (0x6a)
291#define OPC_RET (0xc3)
292#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293#define OPC_SHIFT_1 (0xd1)
294#define OPC_SHIFT_Ib (0xc1)
295#define OPC_SHIFT_cl (0xd3)
296#define OPC_TESTL (0x85)
297#define OPC_XCHG_ax_r32 (0x90)
298
299#define OPC_GRP3_Ev (0xf7)
300#define OPC_GRP5 (0xff)
301
302/* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304#define ARITH_ADD 0
305#define ARITH_OR 1
306#define ARITH_ADC 2
307#define ARITH_SBB 3
308#define ARITH_AND 4
309#define ARITH_SUB 5
310#define ARITH_XOR 6
311#define ARITH_CMP 7
312
313/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314#define SHIFT_ROL 0
315#define SHIFT_ROR 1
316#define SHIFT_SHL 4
317#define SHIFT_SHR 5
318#define SHIFT_SAR 7
319
320/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321#define EXT3_NOT 2
322#define EXT3_NEG 3
323#define EXT3_MUL 4
324#define EXT3_IMUL 5
325#define EXT3_DIV 6
326#define EXT3_IDIV 7
327
328/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329#define EXT5_INC_Ev 0
330#define EXT5_DEC_Ev 1
331#define EXT5_CALLN_Ev 2
332#define EXT5_JMPN_Ev 4
333
334/* Condition codes to be added to OPC_JCC_{long,short}. */
335#define JCC_JMP (-1)
336#define JCC_JO 0x0
337#define JCC_JNO 0x1
338#define JCC_JB 0x2
339#define JCC_JAE 0x3
340#define JCC_JE 0x4
341#define JCC_JNE 0x5
342#define JCC_JBE 0x6
343#define JCC_JA 0x7
344#define JCC_JS 0x8
345#define JCC_JNS 0x9
346#define JCC_JP 0xa
347#define JCC_JNP 0xb
348#define JCC_JL 0xc
349#define JCC_JGE 0xd
350#define JCC_JLE 0xe
351#define JCC_JG 0xf
352
353static const uint8_t tcg_cond_to_jcc[10] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364};
365
366#if TCG_TARGET_REG_BITS == 64
367static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
368{
369 int rex;
370
371 if (opc & P_DATA16) {
372 /* We should never be asking for both 16 and 64-bit operation. */
373 assert((opc & P_REXW) == 0);
374 tcg_out8(s, 0x66);
375 }
376 if (opc & P_ADDR32) {
377 tcg_out8(s, 0x67);
378 }
379
380 rex = 0;
381 rex |= (opc & P_REXW) >> 8; /* REX.W */
382 rex |= (r & 8) >> 1; /* REX.R */
383 rex |= (x & 8) >> 2; /* REX.X */
384 rex |= (rm & 8) >> 3; /* REX.B */
385
386 /* P_REXB_{R,RM} indicates that the given register is the low byte.
387 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
388 as otherwise the encoding indicates %[abcd]h. Note that the values
389 that are ORed in merely indicate that the REX byte must be present;
390 those bits get discarded in output. */
391 rex |= opc & (r >= 4 ? P_REXB_R : 0);
392 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
393
394 if (rex) {
395 tcg_out8(s, (uint8_t)(rex | 0x40));
396 }
397
398 if (opc & P_EXT) {
399 tcg_out8(s, 0x0f);
400 }
401 tcg_out8(s, opc);
402}
403#else
404static void tcg_out_opc(TCGContext *s, int opc)
405{
406 if (opc & P_DATA16) {
407 tcg_out8(s, 0x66);
408 }
409 if (opc & P_EXT) {
410 tcg_out8(s, 0x0f);
411 }
412 tcg_out8(s, opc);
413}
414/* Discard the register arguments to tcg_out_opc early, so as not to penalize
415 the 32-bit compilation paths. This method works with all versions of gcc,
416 whereas relying on optimization may not be able to exclude them. */
417#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
418#endif
419
420static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
421{
422 tcg_out_opc(s, opc, r, rm, 0);
423 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
424}
425
426/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
427 We handle either RM and INDEX missing with a negative value. In 64-bit
428 mode for absolute addresses, ~RM is the size of the immediate operand
429 that will follow the instruction. */
430
431static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
432 int index, int shift,
433 tcg_target_long offset)
434{
435 int mod, len;
436
437 if (index < 0 && rm < 0) {
438 if (TCG_TARGET_REG_BITS == 64) {
439 /* Try for a rip-relative addressing mode. This has replaced
440 the 32-bit-mode absolute addressing encoding. */
441 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
442 tcg_target_long disp = offset - pc;
443 if (disp == (int32_t)disp) {
444 tcg_out_opc(s, opc, r, 0, 0);
445 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
446 tcg_out32(s, disp);
447 return;
448 }
449
450 /* Try for an absolute address encoding. This requires the
451 use of the MODRM+SIB encoding and is therefore larger than
452 rip-relative addressing. */
453 if (offset == (int32_t)offset) {
454 tcg_out_opc(s, opc, r, 0, 0);
455 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
456 tcg_out8(s, (4 << 3) | 5);
457 tcg_out32(s, offset);
458 return;
459 }
460
461 /* ??? The memory isn't directly addressable. */
462 tcg_abort();
463 } else {
464 /* Absolute address. */
465 tcg_out_opc(s, opc, r, 0, 0);
466 tcg_out8(s, (r << 3) | 5);
467 tcg_out32(s, offset);
468 return;
469 }
470 }
471
472 /* Find the length of the immediate addend. Note that the encoding
473 that would be used for (%ebp) indicates absolute addressing. */
474 if (rm < 0) {
475 mod = 0, len = 4, rm = 5;
476 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
477 mod = 0, len = 0;
478 } else if (offset == (int8_t)offset) {
479 mod = 0x40, len = 1;
480 } else {
481 mod = 0x80, len = 4;
482 }
483
484 /* Use a single byte MODRM format if possible. Note that the encoding
485 that would be used for %esp is the escape to the two byte form. */
486 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
487 /* Single byte MODRM format. */
488 tcg_out_opc(s, opc, r, rm, 0);
489 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
490 } else {
491 /* Two byte MODRM+SIB format. */
492
493 /* Note that the encoding that would place %esp into the index
494 field indicates no index register. In 64-bit mode, the REX.X
495 bit counts, so %r12 can be used as the index. */
496 if (index < 0) {
497 index = 4;
498 } else {
499 assert(index != TCG_REG_ESP);
500 }
501
502 tcg_out_opc(s, opc, r, rm, index);
503 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
504 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
505 }
506
507 if (len == 1) {
508 tcg_out8(s, offset);
509 } else if (len == 4) {
510 tcg_out32(s, offset);
511 }
512}
513
514/* A simplification of the above with no index or shift. */
515static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
516 int rm, tcg_target_long offset)
517{
518 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
519}
520
521/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
522static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
523{
524 /* Propagate an opcode prefix, such as P_REXW. */
525 int ext = subop & ~0x7;
526 subop &= 0x7;
527
528 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
529}
530
531static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
532{
533 if (arg != ret) {
534 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
535 tcg_out_modrm(s, opc, ret, arg);
536 }
537}
538
539static void tcg_out_movi(TCGContext *s, TCGType type,
540 int ret, tcg_target_long arg)
541{
542 if (arg == 0) {
543 tgen_arithr(s, ARITH_XOR, ret, ret);
544 return;
545 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
546 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
547 tcg_out32(s, arg);
548 } else if (arg == (int32_t)arg) {
549 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
550 tcg_out32(s, arg);
551 } else {
552 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
553 tcg_out32(s, arg);
554 tcg_out32(s, arg >> 31 >> 1);
555 }
556}
557
558static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
559{
560 if (val == (int8_t)val) {
561 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
562 tcg_out8(s, val);
563 } else if (val == (int32_t)val) {
564 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
565 tcg_out32(s, val);
566 } else {
567 tcg_abort();
568 }
569}
570
571static inline void tcg_out_push(TCGContext *s, int reg)
572{
573 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
574}
575
576static inline void tcg_out_pop(TCGContext *s, int reg)
577{
578 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
579}
580
581static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
582 int arg1, tcg_target_long arg2)
583{
584 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
585 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
586}
587
588static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
589 int arg1, tcg_target_long arg2)
590{
591 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
592 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
593}
594
595static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
596{
597 /* Propagate an opcode prefix, such as P_DATA16. */
598 int ext = subopc & ~0x7;
599 subopc &= 0x7;
600
601 if (count == 1) {
602 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
603 } else {
604 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
605 tcg_out8(s, count);
606 }
607}
608
609static inline void tcg_out_bswap32(TCGContext *s, int reg)
610{
611 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
612}
613
614static inline void tcg_out_rolw_8(TCGContext *s, int reg)
615{
616 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
617}
618
619static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
620{
621 /* movzbl */
622 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
623 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
624}
625
626static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
627{
628 /* movsbl */
629 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
630 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
631}
632
633static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
634{
635 /* movzwl */
636 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
637}
638
639static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
640{
641 /* movsw[lq] */
642 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
643}
644
645static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
646{
647 /* 32-bit mov zero extends. */
648 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
649}
650
651static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
652{
653 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
654}
655
656static inline void tcg_out_bswap64(TCGContext *s, int reg)
657{
658 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
659}
660
661static void tgen_arithi(TCGContext *s, int c, int r0,
662 tcg_target_long val, int cf)
663{
664 int rexw = 0;
665
666 if (TCG_TARGET_REG_BITS == 64) {
667 rexw = c & -8;
668 c &= 7;
669 }
670
671 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
672 partial flags update stalls on Pentium4 and are not recommended
673 by current Intel optimization manuals. */
674 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
675 int is_inc = (c == ARITH_ADD) ^ (val < 0);
676 if (TCG_TARGET_REG_BITS == 64) {
677 /* The single-byte increment encodings are re-tasked as the
678 REX prefixes. Use the MODRM encoding. */
679 tcg_out_modrm(s, OPC_GRP5 + rexw,
680 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
681 } else {
682 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
683 }
684 return;
685 }
686
687 if (c == ARITH_AND) {
688 if (TCG_TARGET_REG_BITS == 64) {
689 if (val == 0xffffffffu) {
690 tcg_out_ext32u(s, r0, r0);
691 return;
692 }
693 if (val == (uint32_t)val) {
694 /* AND with no high bits set can use a 32-bit operation. */
695 rexw = 0;
696 }
697 }
698 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
699 tcg_out_ext8u(s, r0, r0);
700 return;
701 }
702 if (val == 0xffffu) {
703 tcg_out_ext16u(s, r0, r0);
704 return;
705 }
706 }
707
708 if (val == (int8_t)val) {
709 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
710 tcg_out8(s, val);
711 return;
712 }
713 if (rexw == 0 || val == (int32_t)val) {
714 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
715 tcg_out32(s, val);
716 return;
717 }
718
719 tcg_abort();
720}
721
722static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
723{
724 if (val != 0) {
725 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
726 }
727}
728
729#ifdef VBOX
730static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
731{
732 if (val != 0) {
733 tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
734 }
735}
736#endif
737
738/* Use SMALL != 0 to force a short forward branch. */
739static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
740{
741 int32_t val, val1;
742 TCGLabel *l = &s->labels[label_index];
743
744 if (l->has_value) {
745 val = l->u.value - (tcg_target_long)s->code_ptr;
746 val1 = val - 2;
747 if ((int8_t)val1 == val1) {
748 if (opc == -1) {
749 tcg_out8(s, OPC_JMP_short);
750 } else {
751 tcg_out8(s, OPC_JCC_short + opc);
752 }
753 tcg_out8(s, val1);
754 } else {
755 if (small) {
756 tcg_abort();
757 }
758 if (opc == -1) {
759 tcg_out8(s, OPC_JMP_long);
760 tcg_out32(s, val - 5);
761 } else {
762 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
763 tcg_out32(s, val - 6);
764 }
765 }
766 } else if (small) {
767 if (opc == -1) {
768 tcg_out8(s, OPC_JMP_short);
769 } else {
770 tcg_out8(s, OPC_JCC_short + opc);
771 }
772 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
773 s->code_ptr += 1;
774 } else {
775 if (opc == -1) {
776 tcg_out8(s, OPC_JMP_long);
777 } else {
778 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
779 }
780 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
781 s->code_ptr += 4;
782 }
783}
784
785static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
786 int const_arg2, int rexw)
787{
788 if (const_arg2) {
789 if (arg2 == 0) {
790 /* test r, r */
791 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
792 } else {
793 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
794 }
795 } else {
796 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
797 }
798}
799
800static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
801 TCGArg arg1, TCGArg arg2, int const_arg2,
802 int label_index, int small)
803{
804 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
805 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
806}
807
808#if TCG_TARGET_REG_BITS == 64
809static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
810 TCGArg arg1, TCGArg arg2, int const_arg2,
811 int label_index, int small)
812{
813 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
814 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
815}
816#else
817/* XXX: we implement it at the target level to avoid having to
818 handle cross basic blocks temporaries */
819static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
820 const int *const_args, int small)
821{
822 int label_next;
823 label_next = gen_new_label();
824 switch(args[4]) {
825 case TCG_COND_EQ:
826 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
827 label_next, 1);
828 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
829 args[5], small);
830 break;
831 case TCG_COND_NE:
832 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
833 args[5], small);
834 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
835 args[5], small);
836 break;
837 case TCG_COND_LT:
838 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
839 args[5], small);
840 tcg_out_jxx(s, JCC_JNE, label_next, 1);
841 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
842 args[5], small);
843 break;
844 case TCG_COND_LE:
845 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
846 args[5], small);
847 tcg_out_jxx(s, JCC_JNE, label_next, 1);
848 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
849 args[5], small);
850 break;
851 case TCG_COND_GT:
852 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
853 args[5], small);
854 tcg_out_jxx(s, JCC_JNE, label_next, 1);
855 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
856 args[5], small);
857 break;
858 case TCG_COND_GE:
859 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
860 args[5], small);
861 tcg_out_jxx(s, JCC_JNE, label_next, 1);
862 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
863 args[5], small);
864 break;
865 case TCG_COND_LTU:
866 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
867 args[5], small);
868 tcg_out_jxx(s, JCC_JNE, label_next, 1);
869 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
870 args[5], small);
871 break;
872 case TCG_COND_LEU:
873 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
874 args[5], small);
875 tcg_out_jxx(s, JCC_JNE, label_next, 1);
876 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
877 args[5], small);
878 break;
879 case TCG_COND_GTU:
880 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
881 args[5], small);
882 tcg_out_jxx(s, JCC_JNE, label_next, 1);
883 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
884 args[5], small);
885 break;
886 case TCG_COND_GEU:
887 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
888 args[5], small);
889 tcg_out_jxx(s, JCC_JNE, label_next, 1);
890 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
891 args[5], small);
892 break;
893 default:
894 tcg_abort();
895 }
896 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
897}
898#endif
899
900static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
901 TCGArg arg1, TCGArg arg2, int const_arg2)
902{
903 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
904 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
905 tcg_out_ext8u(s, dest, dest);
906}
907
908#if TCG_TARGET_REG_BITS == 64
909static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
910 TCGArg arg1, TCGArg arg2, int const_arg2)
911{
912 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
913 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
914 tcg_out_ext8u(s, dest, dest);
915}
916#else
917static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
918 const int *const_args)
919{
920 TCGArg new_args[6];
921 int label_true, label_over;
922
923 memcpy(new_args, args+1, 5*sizeof(TCGArg));
924
925 if (args[0] == args[1] || args[0] == args[2]
926 || (!const_args[3] && args[0] == args[3])
927 || (!const_args[4] && args[0] == args[4])) {
928 /* When the destination overlaps with one of the argument
929 registers, don't do anything tricky. */
930 label_true = gen_new_label();
931 label_over = gen_new_label();
932
933 new_args[5] = label_true;
934 tcg_out_brcond2(s, new_args, const_args+1, 1);
935
936 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
937 tcg_out_jxx(s, JCC_JMP, label_over, 1);
938 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
939
940 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
941 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
942 } else {
943 /* When the destination does not overlap one of the arguments,
944 clear the destination first, jump if cond false, and emit an
945 increment in the true case. This results in smaller code. */
946
947 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
948
949 label_over = gen_new_label();
950 new_args[4] = tcg_invert_cond(new_args[4]);
951 new_args[5] = label_over;
952 tcg_out_brcond2(s, new_args, const_args+1, 1);
953
954 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
955 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
956 }
957}
958#endif
959
960static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
961{
962 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
963
964 if (disp == (int32_t)disp) {
965 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
966 tcg_out32(s, disp);
967 } else {
968 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
969 tcg_out_modrm(s, OPC_GRP5,
970 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
971 }
972}
973
974static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
975{
976#ifdef VBOX
977 tcg_gen_stack_alignment_check(s);
978#endif
979 tcg_out_branch(s, 1, dest);
980}
981
982static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
983{
984 tcg_out_branch(s, 0, dest);
985}
986
987#if defined(CONFIG_SOFTMMU)
988
989#include "../../softmmu_defs.h"
990
991static void *qemu_ld_helpers[4] = {
992 __ldb_mmu,
993 __ldw_mmu,
994 __ldl_mmu,
995 __ldq_mmu,
996};
997
998static void *qemu_st_helpers[4] = {
999 __stb_mmu,
1000 __stw_mmu,
1001 __stl_mmu,
1002 __stq_mmu,
1003};
1004
1005/* Perform the TLB load and compare.
1006
1007 Inputs:
1008 ADDRLO_IDX contains the index into ARGS of the low part of the
1009 address; the high part of the address is at ADDR_LOW_IDX+1.
1010
1011 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1012
1013 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1014 This should be offsetof addr_read or addr_write.
1015
1016 Outputs:
1017 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1018 positions of the displacements of forward jumps to the TLB miss case.
1019
1020 First argument register is loaded with the low part of the address.
1021 In the TLB hit case, it has been adjusted as indicated by the TLB
1022 and so is a host address. In the TLB miss case, it continues to
1023 hold a guest address.
1024
1025 Second argument register is clobbered. */
1026
1027static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1028 int mem_index, int s_bits,
1029 const TCGArg *args,
1030 uint8_t **label_ptr, int which)
1031{
1032 const int addrlo = args[addrlo_idx];
1033 const int r0 = tcg_target_call_iarg_regs[0];
1034 const int r1 = tcg_target_call_iarg_regs[1];
1035 TCGType type = TCG_TYPE_I32;
1036 int rexw = 0;
1037
1038 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1039 type = TCG_TYPE_I64;
1040 rexw = P_REXW;
1041 }
1042
1043 tcg_out_mov(s, type, r1, addrlo);
1044 tcg_out_mov(s, type, r0, addrlo);
1045
1046 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1047 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1048
1049 tgen_arithi(s, ARITH_AND + rexw, r0,
1050 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1051 tgen_arithi(s, ARITH_AND + rexw, r1,
1052 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1053
1054 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1055 offsetof(CPUState, tlb_table[mem_index][0])
1056 + which);
1057
1058 /* cmp 0(r1), r0 */
1059 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1060
1061 tcg_out_mov(s, type, r0, addrlo);
1062
1063 /* jne label1 */
1064 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1065 label_ptr[0] = s->code_ptr;
1066 s->code_ptr++;
1067
1068 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1069 /* cmp 4(r1), addrhi */
1070 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1071
1072 /* jne label1 */
1073 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1074 label_ptr[1] = s->code_ptr;
1075 s->code_ptr++;
1076 }
1077
1078 /* TLB Hit. */
1079
1080 /* add addend(r1), r0 */
1081 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1082 offsetof(CPUTLBEntry, addend) - which);
1083}
1084#endif
1085
1086static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1087 int base, tcg_target_long ofs, int sizeop)
1088{
1089#ifdef TARGET_WORDS_BIGENDIAN
1090 const int bswap = 1;
1091#else
1092 const int bswap = 0;
1093#endif
1094 switch (sizeop) {
1095 case 0:
1096 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1097 break;
1098 case 0 | 4:
1099 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1100 break;
1101 case 1:
1102 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1103 if (bswap) {
1104 tcg_out_rolw_8(s, datalo);
1105 }
1106 break;
1107 case 1 | 4:
1108 if (bswap) {
1109 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1110 tcg_out_rolw_8(s, datalo);
1111 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1112 } else {
1113 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1114 }
1115 break;
1116 case 2:
1117 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1118 if (bswap) {
1119 tcg_out_bswap32(s, datalo);
1120 }
1121 break;
1122#if TCG_TARGET_REG_BITS == 64
1123 case 2 | 4:
1124 if (bswap) {
1125 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1126 tcg_out_bswap32(s, datalo);
1127 tcg_out_ext32s(s, datalo, datalo);
1128 } else {
1129 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1130 }
1131 break;
1132#endif
1133 case 3:
1134 if (TCG_TARGET_REG_BITS == 64) {
1135 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1136 if (bswap) {
1137 tcg_out_bswap64(s, datalo);
1138 }
1139 } else {
1140 if (bswap) {
1141 int t = datalo;
1142 datalo = datahi;
1143 datahi = t;
1144 }
1145 if (base != datalo) {
1146 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1147 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1148 } else {
1149 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1150 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1151 }
1152 if (bswap) {
1153 tcg_out_bswap32(s, datalo);
1154 tcg_out_bswap32(s, datahi);
1155 }
1156 }
1157 break;
1158 default:
1159 tcg_abort();
1160 }
1161}
1162
1163#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
1164
1165static void * const vbox_ld_helpers[] = {
1166 __ldub_vbox_phys,
1167 __lduw_vbox_phys,
1168 __ldul_vbox_phys,
1169 __ldq_vbox_phys,
1170 __ldb_vbox_phys,
1171 __ldw_vbox_phys,
1172 __ldl_vbox_phys,
1173 __ldq_vbox_phys,
1174};
1175
1176static void * const vbox_st_helpers[] = {
1177 __stb_vbox_phys,
1178 __stw_vbox_phys,
1179 __stl_vbox_phys,
1180 __stq_vbox_phys
1181};
1182
1183DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
1184{
1185 intptr_t disp;
1186# ifdef VBOX
1187 tcg_gen_stack_alignment_check(s);
1188# endif
1189 disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
1190 tcg_out8(s, 0xe8); /* call disp32 */
1191 tcg_out32(s, disp); /* disp32 */
1192}
1193
1194static void tcg_out_vbox_phys_read(TCGContext *s, int index,
1195 int addr_reg,
1196 int data_reg, int data_reg2)
1197{
1198 int useReg2 = ((index & 3) == 3);
1199
1200 /** @todo: should we make phys address accessors fastcalls - probably not a big deal */
1201 /* out parameter (address), note that phys address is always 64-bit */
1202 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1203
1204# if 0
1205 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1206 tcg_out_push(s, addr_reg);
1207# else
1208 /* mov addr_reg, %eax */
1209 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1210# endif
1211
1212 tcg_out_long_call(s, vbox_ld_helpers[index]);
1213
1214 /* mov %eax, data_reg */
1215 tcg_out_mov(s, data_reg, TCG_REG_EAX);
1216
1217 /* returned 64-bit value */
1218 if (useReg2)
1219 tcg_out_mov(s, data_reg2, TCG_REG_EDX);
1220}
1221
1222static void tcg_out_vbox_phys_write(TCGContext *s, int index,
1223 int addr_reg,
1224 int val_reg, int val_reg2) {
1225 int useReg2 = ((index & 3) == 3);
1226
1227# if 0
1228 /* out parameter (value2) */
1229 if (useReg2)
1230 tcg_out_push(s, val_reg2);
1231 /* out parameter (value) */
1232 tcg_out_push(s, val_reg);
1233 /* out parameter (address), note that phys address is always 64-bit */
1234 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1235 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1236 tcg_out_push(s, addr_reg);
1237# else
1238 Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
1239 /* mov addr_reg, %eax */
1240 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1241 Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
1242 /* mov val_reg, %edx */
1243 tcg_out_mov(s, TCG_REG_EDX, val_reg);
1244 if (useReg2)
1245 tcg_out_mov(s, TCG_REG_ECX, val_reg2);
1246
1247# endif
1248 /* call it */
1249 tcg_out_long_call(s, vbox_st_helpers[index]);
1250
1251 /* clean stack after us */
1252# if 0
1253 tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
1254# endif
1255}
1256
1257#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
1258
1259/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1260 EAX. It will be useful once fixed registers globals are less
1261 common. */
1262static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1263 int opc)
1264{
1265 int data_reg, data_reg2 = 0;
1266 int addrlo_idx;
1267#if defined(CONFIG_SOFTMMU)
1268 int mem_index, s_bits, arg_idx;
1269 uint8_t *label_ptr[3];
1270#endif
1271
1272 data_reg = args[0];
1273 addrlo_idx = 1;
1274 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1275 data_reg2 = args[1];
1276 addrlo_idx = 2;
1277 }
1278
1279#if defined(CONFIG_SOFTMMU)
1280 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1281 s_bits = opc & 3;
1282
1283 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1284 label_ptr, offsetof(CPUTLBEntry, addr_read));
1285
1286 /* TLB Hit. */
1287 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1288 tcg_target_call_iarg_regs[0], 0, opc);
1289
1290 /* jmp label2 */
1291 tcg_out8(s, OPC_JMP_short);
1292 label_ptr[2] = s->code_ptr;
1293 s->code_ptr++;
1294
1295 /* TLB Miss. */
1296
1297 /* label1: */
1298 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1299 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1300 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1301 }
1302
1303 /* XXX: move that code at the end of the TB */
1304 /* The first argument is already loaded with addrlo. */
1305 arg_idx = 1;
1306 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1307 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1308 args[addrlo_idx + 1]);
1309 }
1310 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1311 mem_index);
1312 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1313
1314 switch(opc) {
1315 case 0 | 4:
1316 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1317 break;
1318 case 1 | 4:
1319 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1320 break;
1321 case 0:
1322 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1323 break;
1324 case 1:
1325 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1326 break;
1327 case 2:
1328 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1329 break;
1330#if TCG_TARGET_REG_BITS == 64
1331 case 2 | 4:
1332 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1333 break;
1334#endif
1335 case 3:
1336 if (TCG_TARGET_REG_BITS == 64) {
1337 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1338 } else if (data_reg == TCG_REG_EDX) {
1339 /* xchg %edx, %eax */
1340 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1341 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1342 } else {
1343 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1344 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1345 }
1346 break;
1347 default:
1348 tcg_abort();
1349 }
1350
1351 /* label2: */
1352 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1353#else
1354 {
1355 int32_t offset = GUEST_BASE;
1356 int base = args[addrlo_idx];
1357
1358 if (TCG_TARGET_REG_BITS == 64) {
1359 /* ??? We assume all operations have left us with register
1360 contents that are zero extended. So far this appears to
1361 be true. If we want to enforce this, we can either do
1362 an explicit zero-extension here, or (if GUEST_BASE == 0)
1363 use the ADDR32 prefix. For now, do nothing. */
1364
1365 if (offset != GUEST_BASE) {
1366 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1367 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1368 base = TCG_REG_RDI, offset = 0;
1369 }
1370 }
1371
1372 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1373 }
1374#endif
1375}
1376
1377static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1378 int base, tcg_target_long ofs, int sizeop)
1379{
1380#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1381#ifdef TARGET_WORDS_BIGENDIAN
1382 const int bswap = 1;
1383#else
1384 const int bswap = 0;
1385#endif
1386 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1387 we could perform the bswap twice to restore the original value
1388 instead of moving to the scratch. But as it is, the L constraint
1389 means that the second argument reg is definitely free here. */
1390 int scratch = tcg_target_call_iarg_regs[1];
1391
1392 switch (sizeop) {
1393 case 0:
1394 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1395 break;
1396 case 1:
1397 if (bswap) {
1398 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1399 tcg_out_rolw_8(s, scratch);
1400 datalo = scratch;
1401 }
1402 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1403 break;
1404 case 2:
1405 if (bswap) {
1406 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1407 tcg_out_bswap32(s, scratch);
1408 datalo = scratch;
1409 }
1410 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1411 break;
1412 case 3:
1413 if (TCG_TARGET_REG_BITS == 64) {
1414 if (bswap) {
1415 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1416 tcg_out_bswap64(s, scratch);
1417 datalo = scratch;
1418 }
1419 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1420 } else if (bswap) {
1421 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1422 tcg_out_bswap32(s, scratch);
1423 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1424 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1425 tcg_out_bswap32(s, scratch);
1426 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1427 } else {
1428 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1429 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1430 }
1431 break;
1432 default:
1433 tcg_abort();
1434 }
1435#else /* VBOX */
1436# error "broken"
1437 tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
1438#endif
1439}
1440
1441static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1442 int opc)
1443{
1444 int data_reg, data_reg2 = 0;
1445 int addrlo_idx;
1446#if defined(CONFIG_SOFTMMU)
1447 int mem_index, s_bits;
1448 int stack_adjust;
1449 uint8_t *label_ptr[3];
1450#endif
1451
1452 data_reg = args[0];
1453 addrlo_idx = 1;
1454 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1455 data_reg2 = args[1];
1456 addrlo_idx = 2;
1457 }
1458
1459#if defined(CONFIG_SOFTMMU)
1460 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1461 s_bits = opc;
1462
1463 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1464 label_ptr, offsetof(CPUTLBEntry, addr_write));
1465
1466 /* TLB Hit. */
1467 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1468 tcg_target_call_iarg_regs[0], 0, opc);
1469
1470 /* jmp label2 */
1471 tcg_out8(s, OPC_JMP_short);
1472 label_ptr[2] = s->code_ptr;
1473 s->code_ptr++;
1474
1475 /* TLB Miss. */
1476
1477 /* label1: */
1478 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1479 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1480 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1481 }
1482
1483#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1484
1485 /* XXX: move that code at the end of the TB */
1486 if (TCG_TARGET_REG_BITS == 64) {
1487 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1488 TCG_REG_RSI, data_reg);
1489 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1490 stack_adjust = 0;
1491 } else if (TARGET_LONG_BITS == 32) {
1492 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1493 if (opc == 3) {
1494 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1495 tcg_out_pushi(s, mem_index);
1496 stack_adjust = 4;
1497 } else {
1498 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1499 stack_adjust = 0;
1500 }
1501 } else {
1502 if (opc == 3) {
1503 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1504 tcg_out_pushi(s, mem_index);
1505 tcg_out_push(s, data_reg2);
1506 tcg_out_push(s, data_reg);
1507 stack_adjust = 12;
1508 } else {
1509 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1510 switch(opc) {
1511 case 0:
1512 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1513 break;
1514 case 1:
1515 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1516 break;
1517 case 2:
1518 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1519 break;
1520 }
1521 tcg_out_pushi(s, mem_index);
1522 stack_adjust = 4;
1523 }
1524 }
1525
1526 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1527
1528 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1529 /* Pop and discard. This is 2 bytes smaller than the add. */
1530 tcg_out_pop(s, TCG_REG_ECX);
1531 } else if (stack_adjust != 0) {
1532 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1533 }
1534
1535#else /* VBOX && REM_PHYS_ADDR_IN_TLB */
1536# error Borked
1537 tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
1538#endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
1539
1540 /* label2: */
1541 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1542#else
1543 {
1544 int32_t offset = GUEST_BASE;
1545 int base = args[addrlo_idx];
1546
1547 if (TCG_TARGET_REG_BITS == 64) {
1548 /* ??? We assume all operations have left us with register
1549 contents that are zero extended. So far this appears to
1550 be true. If we want to enforce this, we can either do
1551 an explicit zero-extension here, or (if GUEST_BASE == 0)
1552 use the ADDR32 prefix. For now, do nothing. */
1553
1554 if (offset != GUEST_BASE) {
1555 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1556 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1557 base = TCG_REG_RDI, offset = 0;
1558 }
1559 }
1560
1561 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1562 }
1563#endif
1564}
1565
1566static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1567 const TCGArg *args, const int *const_args)
1568{
1569 int c, rexw = 0;
1570
1571#if TCG_TARGET_REG_BITS == 64
1572# define OP_32_64(x) \
1573 case glue(glue(INDEX_op_, x), _i64): \
1574 rexw = P_REXW; /* FALLTHRU */ \
1575 case glue(glue(INDEX_op_, x), _i32)
1576#else
1577# define OP_32_64(x) \
1578 case glue(glue(INDEX_op_, x), _i32)
1579#endif
1580
1581 switch(opc) {
1582 case INDEX_op_exit_tb:
1583 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1584 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1585 break;
1586 case INDEX_op_goto_tb:
1587 if (s->tb_jmp_offset) {
1588 /* direct jump method */
1589 tcg_out8(s, OPC_JMP_long); /* jmp im */
1590 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1591 tcg_out32(s, 0);
1592 } else {
1593 /* indirect jump method */
1594 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1595 (tcg_target_long)(s->tb_next + args[0]));
1596 }
1597 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1598 break;
1599 case INDEX_op_call:
1600 if (const_args[0]) {
1601 tcg_out_calli(s, args[0]);
1602 } else {
1603 /* call *reg */
1604 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1605 }
1606 break;
1607 case INDEX_op_jmp:
1608 if (const_args[0]) {
1609 tcg_out_jmp(s, args[0]);
1610 } else {
1611 /* jmp *reg */
1612 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1613 }
1614 break;
1615 case INDEX_op_br:
1616 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1617 break;
1618 case INDEX_op_movi_i32:
1619 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1620 break;
1621 OP_32_64(ld8u):
1622 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1623 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1624 break;
1625 OP_32_64(ld8s):
1626 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1627 break;
1628 OP_32_64(ld16u):
1629 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1630 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1631 break;
1632 OP_32_64(ld16s):
1633 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1634 break;
1635#if TCG_TARGET_REG_BITS == 64
1636 case INDEX_op_ld32u_i64:
1637#endif
1638 case INDEX_op_ld_i32:
1639 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1640 break;
1641
1642 OP_32_64(st8):
1643 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1644 args[0], args[1], args[2]);
1645 break;
1646 OP_32_64(st16):
1647 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1648 args[0], args[1], args[2]);
1649 break;
1650#if TCG_TARGET_REG_BITS == 64
1651 case INDEX_op_st32_i64:
1652#endif
1653 case INDEX_op_st_i32:
1654 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1655 break;
1656
1657 OP_32_64(add):
1658 /* For 3-operand addition, use LEA. */
1659 if (args[0] != args[1]) {
1660 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1661
1662 if (const_args[2]) {
1663 c3 = a2, a2 = -1;
1664 } else if (a0 == a2) {
1665 /* Watch out for dest = src + dest, since we've removed
1666 the matching constraint on the add. */
1667 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1668 break;
1669 }
1670
1671 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1672 break;
1673 }
1674 c = ARITH_ADD;
1675 goto gen_arith;
1676 OP_32_64(sub):
1677 c = ARITH_SUB;
1678 goto gen_arith;
1679 OP_32_64(and):
1680 c = ARITH_AND;
1681 goto gen_arith;
1682 OP_32_64(or):
1683 c = ARITH_OR;
1684 goto gen_arith;
1685 OP_32_64(xor):
1686 c = ARITH_XOR;
1687 goto gen_arith;
1688 gen_arith:
1689 if (const_args[2]) {
1690 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1691 } else {
1692 tgen_arithr(s, c + rexw, args[0], args[2]);
1693 }
1694 break;
1695
1696 OP_32_64(mul):
1697 if (const_args[2]) {
1698 int32_t val;
1699 val = args[2];
1700 if (val == (int8_t)val) {
1701 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1702 tcg_out8(s, val);
1703 } else {
1704 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1705 tcg_out32(s, val);
1706 }
1707 } else {
1708 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1709 }
1710 break;
1711
1712 OP_32_64(div2):
1713 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1714 break;
1715 OP_32_64(divu2):
1716 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1717 break;
1718
1719 OP_32_64(shl):
1720 c = SHIFT_SHL;
1721 goto gen_shift;
1722 OP_32_64(shr):
1723 c = SHIFT_SHR;
1724 goto gen_shift;
1725 OP_32_64(sar):
1726 c = SHIFT_SAR;
1727 goto gen_shift;
1728 OP_32_64(rotl):
1729 c = SHIFT_ROL;
1730 goto gen_shift;
1731 OP_32_64(rotr):
1732 c = SHIFT_ROR;
1733 goto gen_shift;
1734 gen_shift:
1735 if (const_args[2]) {
1736 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1737 } else {
1738 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1739 }
1740 break;
1741
1742 case INDEX_op_brcond_i32:
1743 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1744 args[3], 0);
1745 break;
1746 case INDEX_op_setcond_i32:
1747 tcg_out_setcond32(s, args[3], args[0], args[1],
1748 args[2], const_args[2]);
1749 break;
1750
1751 OP_32_64(bswap16):
1752 tcg_out_rolw_8(s, args[0]);
1753 break;
1754 OP_32_64(bswap32):
1755 tcg_out_bswap32(s, args[0]);
1756 break;
1757
1758 OP_32_64(neg):
1759 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1760 break;
1761 OP_32_64(not):
1762 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1763 break;
1764
1765 OP_32_64(ext8s):
1766 tcg_out_ext8s(s, args[0], args[1], rexw);
1767 break;
1768 OP_32_64(ext16s):
1769 tcg_out_ext16s(s, args[0], args[1], rexw);
1770 break;
1771 OP_32_64(ext8u):
1772 tcg_out_ext8u(s, args[0], args[1]);
1773 break;
1774 OP_32_64(ext16u):
1775 tcg_out_ext16u(s, args[0], args[1]);
1776 break;
1777
1778 case INDEX_op_qemu_ld8u:
1779 tcg_out_qemu_ld(s, args, 0);
1780 break;
1781 case INDEX_op_qemu_ld8s:
1782 tcg_out_qemu_ld(s, args, 0 | 4);
1783 break;
1784 case INDEX_op_qemu_ld16u:
1785 tcg_out_qemu_ld(s, args, 1);
1786 break;
1787 case INDEX_op_qemu_ld16s:
1788 tcg_out_qemu_ld(s, args, 1 | 4);
1789 break;
1790#if TCG_TARGET_REG_BITS == 64
1791 case INDEX_op_qemu_ld32u:
1792#endif
1793 case INDEX_op_qemu_ld32:
1794 tcg_out_qemu_ld(s, args, 2);
1795 break;
1796 case INDEX_op_qemu_ld64:
1797 tcg_out_qemu_ld(s, args, 3);
1798 break;
1799
1800 case INDEX_op_qemu_st8:
1801 tcg_out_qemu_st(s, args, 0);
1802 break;
1803 case INDEX_op_qemu_st16:
1804 tcg_out_qemu_st(s, args, 1);
1805 break;
1806 case INDEX_op_qemu_st32:
1807 tcg_out_qemu_st(s, args, 2);
1808 break;
1809 case INDEX_op_qemu_st64:
1810 tcg_out_qemu_st(s, args, 3);
1811 break;
1812
1813#if TCG_TARGET_REG_BITS == 32
1814 case INDEX_op_brcond2_i32:
1815 tcg_out_brcond2(s, args, const_args, 0);
1816 break;
1817 case INDEX_op_setcond2_i32:
1818 tcg_out_setcond2(s, args, const_args);
1819 break;
1820 case INDEX_op_mulu2_i32:
1821 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1822 break;
1823 case INDEX_op_add2_i32:
1824 if (const_args[4]) {
1825 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1826 } else {
1827 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1828 }
1829 if (const_args[5]) {
1830 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1831 } else {
1832 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1833 }
1834 break;
1835 case INDEX_op_sub2_i32:
1836 if (const_args[4]) {
1837 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1838 } else {
1839 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1840 }
1841 if (const_args[5]) {
1842 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1843 } else {
1844 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1845 }
1846 break;
1847#else /* TCG_TARGET_REG_BITS == 64 */
1848 case INDEX_op_movi_i64:
1849 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1850 break;
1851 case INDEX_op_ld32s_i64:
1852 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1853 break;
1854 case INDEX_op_ld_i64:
1855 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1856 break;
1857 case INDEX_op_st_i64:
1858 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1859 break;
1860 case INDEX_op_qemu_ld32s:
1861 tcg_out_qemu_ld(s, args, 2 | 4);
1862 break;
1863
1864 case INDEX_op_brcond_i64:
1865 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1866 args[3], 0);
1867 break;
1868 case INDEX_op_setcond_i64:
1869 tcg_out_setcond64(s, args[3], args[0], args[1],
1870 args[2], const_args[2]);
1871 break;
1872
1873 case INDEX_op_bswap64_i64:
1874 tcg_out_bswap64(s, args[0]);
1875 break;
1876 case INDEX_op_ext32u_i64:
1877 tcg_out_ext32u(s, args[0], args[1]);
1878 break;
1879 case INDEX_op_ext32s_i64:
1880 tcg_out_ext32s(s, args[0], args[1]);
1881 break;
1882#endif
1883
1884 default:
1885 tcg_abort();
1886 }
1887
1888#undef OP_32_64
1889}
1890
1891static const TCGTargetOpDef x86_op_defs[] = {
1892 { INDEX_op_exit_tb, { } },
1893 { INDEX_op_goto_tb, { } },
1894 { INDEX_op_call, { "ri" } },
1895 { INDEX_op_jmp, { "ri" } },
1896 { INDEX_op_br, { } },
1897 { INDEX_op_mov_i32, { "r", "r" } },
1898 { INDEX_op_movi_i32, { "r" } },
1899 { INDEX_op_ld8u_i32, { "r", "r" } },
1900 { INDEX_op_ld8s_i32, { "r", "r" } },
1901 { INDEX_op_ld16u_i32, { "r", "r" } },
1902 { INDEX_op_ld16s_i32, { "r", "r" } },
1903 { INDEX_op_ld_i32, { "r", "r" } },
1904 { INDEX_op_st8_i32, { "q", "r" } },
1905 { INDEX_op_st16_i32, { "r", "r" } },
1906 { INDEX_op_st_i32, { "r", "r" } },
1907
1908 { INDEX_op_add_i32, { "r", "r", "ri" } },
1909 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1910 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1911 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1912 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1913 { INDEX_op_and_i32, { "r", "0", "ri" } },
1914 { INDEX_op_or_i32, { "r", "0", "ri" } },
1915 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1916
1917 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1918 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1919 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1920 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1921 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1922
1923 { INDEX_op_brcond_i32, { "r", "ri" } },
1924
1925 { INDEX_op_bswap16_i32, { "r", "0" } },
1926 { INDEX_op_bswap32_i32, { "r", "0" } },
1927
1928 { INDEX_op_neg_i32, { "r", "0" } },
1929
1930 { INDEX_op_not_i32, { "r", "0" } },
1931
1932 { INDEX_op_ext8s_i32, { "r", "q" } },
1933 { INDEX_op_ext16s_i32, { "r", "r" } },
1934 { INDEX_op_ext8u_i32, { "r", "q" } },
1935 { INDEX_op_ext16u_i32, { "r", "r" } },
1936
1937 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1938
1939#if TCG_TARGET_REG_BITS == 32
1940 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1941 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1942 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1943 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1944 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1945#else
1946 { INDEX_op_mov_i64, { "r", "r" } },
1947 { INDEX_op_movi_i64, { "r" } },
1948 { INDEX_op_ld8u_i64, { "r", "r" } },
1949 { INDEX_op_ld8s_i64, { "r", "r" } },
1950 { INDEX_op_ld16u_i64, { "r", "r" } },
1951 { INDEX_op_ld16s_i64, { "r", "r" } },
1952 { INDEX_op_ld32u_i64, { "r", "r" } },
1953 { INDEX_op_ld32s_i64, { "r", "r" } },
1954 { INDEX_op_ld_i64, { "r", "r" } },
1955 { INDEX_op_st8_i64, { "r", "r" } },
1956 { INDEX_op_st16_i64, { "r", "r" } },
1957 { INDEX_op_st32_i64, { "r", "r" } },
1958 { INDEX_op_st_i64, { "r", "r" } },
1959
1960 { INDEX_op_add_i64, { "r", "0", "re" } },
1961 { INDEX_op_mul_i64, { "r", "0", "re" } },
1962 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1963 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1964 { INDEX_op_sub_i64, { "r", "0", "re" } },
1965 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1966 { INDEX_op_or_i64, { "r", "0", "re" } },
1967 { INDEX_op_xor_i64, { "r", "0", "re" } },
1968
1969 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1970 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1971 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1972 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1973 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1974
1975 { INDEX_op_brcond_i64, { "r", "re" } },
1976 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1977
1978 { INDEX_op_bswap16_i64, { "r", "0" } },
1979 { INDEX_op_bswap32_i64, { "r", "0" } },
1980 { INDEX_op_bswap64_i64, { "r", "0" } },
1981 { INDEX_op_neg_i64, { "r", "0" } },
1982 { INDEX_op_not_i64, { "r", "0" } },
1983
1984 { INDEX_op_ext8s_i64, { "r", "r" } },
1985 { INDEX_op_ext16s_i64, { "r", "r" } },
1986 { INDEX_op_ext32s_i64, { "r", "r" } },
1987 { INDEX_op_ext8u_i64, { "r", "r" } },
1988 { INDEX_op_ext16u_i64, { "r", "r" } },
1989 { INDEX_op_ext32u_i64, { "r", "r" } },
1990#endif
1991
1992#if TCG_TARGET_REG_BITS == 64
1993 { INDEX_op_qemu_ld8u, { "r", "L" } },
1994 { INDEX_op_qemu_ld8s, { "r", "L" } },
1995 { INDEX_op_qemu_ld16u, { "r", "L" } },
1996 { INDEX_op_qemu_ld16s, { "r", "L" } },
1997 { INDEX_op_qemu_ld32, { "r", "L" } },
1998 { INDEX_op_qemu_ld32u, { "r", "L" } },
1999 { INDEX_op_qemu_ld32s, { "r", "L" } },
2000 { INDEX_op_qemu_ld64, { "r", "L" } },
2001
2002 { INDEX_op_qemu_st8, { "L", "L" } },
2003 { INDEX_op_qemu_st16, { "L", "L" } },
2004 { INDEX_op_qemu_st32, { "L", "L" } },
2005 { INDEX_op_qemu_st64, { "L", "L" } },
2006#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2007 { INDEX_op_qemu_ld8u, { "r", "L" } },
2008 { INDEX_op_qemu_ld8s, { "r", "L" } },
2009 { INDEX_op_qemu_ld16u, { "r", "L" } },
2010 { INDEX_op_qemu_ld16s, { "r", "L" } },
2011 { INDEX_op_qemu_ld32, { "r", "L" } },
2012 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2013
2014 { INDEX_op_qemu_st8, { "cb", "L" } },
2015 { INDEX_op_qemu_st16, { "L", "L" } },
2016 { INDEX_op_qemu_st32, { "L", "L" } },
2017 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2018#else
2019 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2020 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2021 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2022 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2023 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2024 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2025
2026 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2027 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2028 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2029 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2030#endif
2031 { -1 },
2032};
2033
2034static int tcg_target_callee_save_regs[] = {
2035#if TCG_TARGET_REG_BITS == 64
2036 TCG_REG_RBP,
2037 TCG_REG_RBX,
2038 TCG_REG_R12,
2039 TCG_REG_R13,
2040 /* TCG_REG_R14, */ /* Currently used for the global env. */
2041 TCG_REG_R15,
2042#else
2043# ifndef VBOX
2044 /* TCG_REG_EBP, */ /* Currently used for the global env. */
2045 TCG_REG_EBX,
2046 TCG_REG_ESI,
2047 TCG_REG_EDI,
2048# else
2049 TCG_REG_EBP,
2050 TCG_REG_EBX,
2051 /* TCG_REG_ESI, */ /* Currently used for the global env. */
2052 TCG_REG_EDI,
2053# endif
2054#endif
2055};
2056
2057/* Generate global QEMU prologue and epilogue code */
2058static void tcg_target_qemu_prologue(TCGContext *s)
2059{
2060 int i, frame_size, push_size, stack_addend;
2061
2062 /* TB prologue */
2063
2064 /* Save all callee saved registers. */
2065 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2066 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2067 }
2068# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
2069 tcg_out8(s, 0x31); /* xor ebp, ebp */
2070 tcg_out8(s, 0xed);
2071# endif
2072
2073 /* Reserve some stack space. */
2074 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
2075 push_size *= TCG_TARGET_REG_BITS / 8;
2076
2077 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
2078 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
2079 ~(TCG_TARGET_STACK_ALIGN - 1);
2080 stack_addend = frame_size - push_size;
2081 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2082
2083 /* jmp *tb. */
2084 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
2085# ifdef VBOX
2086 tcg_gen_stack_alignment_check(s);
2087# endif
2088
2089 tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
2090
2091 /* TB epilogue */
2092 tb_ret_addr = s->code_ptr;
2093
2094 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
2095
2096 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2097 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2098 }
2099 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2100}
2101
2102static void tcg_target_init(TCGContext *s)
2103{
2104#if !defined(CONFIG_USER_ONLY)
2105 /* fail safe */
2106 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2107 tcg_abort();
2108#endif
2109
2110 if (TCG_TARGET_REG_BITS == 64) {
2111 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2112 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2113 } else {
2114 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2115 }
2116
2117 tcg_regset_clear(tcg_target_call_clobber_regs);
2118 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2119 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2120 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2121 if (TCG_TARGET_REG_BITS == 64) {
2122 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2123 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2124 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2125 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2126 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2127 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2128 }
2129
2130 tcg_regset_clear(s->reserved_regs);
2131 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
2132
2133 tcg_add_target_add_op_defs(x86_op_defs);
2134}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette