VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp@ 41527

Last change on this file since 41527 was 41505, checked in by vboxsync, 13 years ago

DISFormatYasmIsOddEncoding: OP_MOV reg,reg variants.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 59.0 KB
Line 
1/* $Id: DisasmFormatYasm.cpp 41505 2012-05-30 19:18:15Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include <VBox/dis.h>
23#include "DisasmInternal.h"
24#include <iprt/string.h>
25#include <iprt/assert.h>
26#include <iprt/ctype.h>
27
28
29/*******************************************************************************
30* Global Variables *
31*******************************************************************************/
32static const char g_szSpaces[] =
33" ";
34static const char g_aszYasmRegGen8[20][5] =
35{
36 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
37};
38static const char g_aszYasmRegGen16[16][5] =
39{
40 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
41};
42static const char g_aszYasmRegGen1616[8][6] =
43{
44 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
45};
46static const char g_aszYasmRegGen32[16][5] =
47{
48 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
49};
50static const char g_aszYasmRegGen64[16][4] =
51{
52 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
53};
54static const char g_aszYasmRegSeg[6][3] =
55{
56 "es", "cs", "ss", "ds", "fs", "gs"
57};
58static const char g_aszYasmRegFP[8][4] =
59{
60 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
61};
62static const char g_aszYasmRegMMX[8][4] =
63{
64 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
65};
66static const char g_aszYasmRegXMM[16][6] =
67{
68 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
69};
70static const char g_aszYasmRegCRx[16][5] =
71{
72 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
73};
74static const char g_aszYasmRegDRx[16][5] =
75{
76 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
77};
78static const char g_aszYasmRegTRx[16][5] =
79{
80 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
81};
82
83
84
85/**
86 * Gets the base register name for the given parameter.
87 *
88 * @returns Pointer to the register name.
89 * @param pCpu The disassembler cpu state.
90 * @param pParam The parameter.
91 * @param pcchReg Where to store the length of the name.
92 */
93static const char *disasmFormatYasmBaseReg(PCDISCPUSTATE pCpu, PCOP_PARAMETER pParam, size_t *pcchReg)
94{
95 switch (pParam->flags & ( USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64
96 | USE_REG_FP | USE_REG_MMX | USE_REG_XMM | USE_REG_CR
97 | USE_REG_DBG | USE_REG_SEG | USE_REG_TEST))
98
99 {
100 case USE_REG_GEN8:
101 {
102 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8));
103 const char *psz = g_aszYasmRegGen8[pParam->base.reg_gen];
104 *pcchReg = 2 + !!psz[2] + !!psz[3];
105 return psz;
106 }
107
108 case USE_REG_GEN16:
109 {
110 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
111 const char *psz = g_aszYasmRegGen16[pParam->base.reg_gen];
112 *pcchReg = 2 + !!psz[2] + !!psz[3];
113 return psz;
114 }
115
116 case USE_REG_GEN32:
117 {
118 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
119 const char *psz = g_aszYasmRegGen32[pParam->base.reg_gen];
120 *pcchReg = 2 + !!psz[2] + !!psz[3];
121 return psz;
122 }
123
124 case USE_REG_GEN64:
125 {
126 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
127 const char *psz = g_aszYasmRegGen64[pParam->base.reg_gen];
128 *pcchReg = 2 + !!psz[2] + !!psz[3];
129 return psz;
130 }
131
132 case USE_REG_FP:
133 {
134 Assert(pParam->base.reg_fp < RT_ELEMENTS(g_aszYasmRegFP));
135 const char *psz = g_aszYasmRegFP[pParam->base.reg_fp];
136 *pcchReg = 3;
137 return psz;
138 }
139
140 case USE_REG_MMX:
141 {
142 Assert(pParam->base.reg_mmx < RT_ELEMENTS(g_aszYasmRegMMX));
143 const char *psz = g_aszYasmRegMMX[pParam->base.reg_mmx];
144 *pcchReg = 3;
145 return psz;
146 }
147
148 case USE_REG_XMM:
149 {
150 Assert(pParam->base.reg_xmm < RT_ELEMENTS(g_aszYasmRegXMM));
151 const char *psz = g_aszYasmRegXMM[pParam->base.reg_mmx];
152 *pcchReg = 4 + !!psz[4];
153 return psz;
154 }
155
156 case USE_REG_CR:
157 {
158 Assert(pParam->base.reg_ctrl < RT_ELEMENTS(g_aszYasmRegCRx));
159 const char *psz = g_aszYasmRegCRx[pParam->base.reg_ctrl];
160 *pcchReg = 3;
161 return psz;
162 }
163
164 case USE_REG_DBG:
165 {
166 Assert(pParam->base.reg_dbg < RT_ELEMENTS(g_aszYasmRegDRx));
167 const char *psz = g_aszYasmRegDRx[pParam->base.reg_dbg];
168 *pcchReg = 3;
169 return psz;
170 }
171
172 case USE_REG_SEG:
173 {
174 Assert(pParam->base.reg_seg < (DIS_SELREG)RT_ELEMENTS(g_aszYasmRegCRx));
175 const char *psz = g_aszYasmRegSeg[pParam->base.reg_seg];
176 *pcchReg = 2;
177 return psz;
178 }
179
180 case USE_REG_TEST:
181 {
182 Assert(pParam->base.reg_test < RT_ELEMENTS(g_aszYasmRegTRx));
183 const char *psz = g_aszYasmRegTRx[pParam->base.reg_test];
184 *pcchReg = 3;
185 return psz;
186 }
187
188 default:
189 AssertMsgFailed(("%#x\n", pParam->flags));
190 *pcchReg = 3;
191 return "r??";
192 }
193}
194
195
196/**
197 * Gets the index register name for the given parameter.
198 *
199 * @returns The index register name.
200 * @param pCpu The disassembler cpu state.
201 * @param pParam The parameter.
202 * @param pcchReg Where to store the length of the name.
203 */
204static const char *disasmFormatYasmIndexReg(PCDISCPUSTATE pCpu, PCOP_PARAMETER pParam, size_t *pcchReg)
205{
206 switch (pCpu->addrmode)
207 {
208 case CPUMODE_16BIT:
209 {
210 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
211 const char *psz = g_aszYasmRegGen16[pParam->index.reg_gen];
212 *pcchReg = 2 + !!psz[2] + !!psz[3];
213 return psz;
214 }
215
216 case CPUMODE_32BIT:
217 {
218 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
219 const char *psz = g_aszYasmRegGen32[pParam->index.reg_gen];
220 *pcchReg = 2 + !!psz[2] + !!psz[3];
221 return psz;
222 }
223
224 case CPUMODE_64BIT:
225 {
226 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
227 const char *psz = g_aszYasmRegGen64[pParam->index.reg_gen];
228 *pcchReg = 2 + !!psz[2] + !!psz[3];
229 return psz;
230 }
231
232 default:
233 AssertMsgFailed(("%#x %#x\n", pParam->flags, pCpu->addrmode));
234 *pcchReg = 3;
235 return "r??";
236 }
237}
238
239
240/**
241 * Formats the current instruction in Yasm (/ Nasm) style.
242 *
243 *
244 * @returns The number of output characters. If this is >= cchBuf, then the content
245 * of pszBuf will be truncated.
246 * @param pCpu Pointer to the disassembler CPU state.
247 * @param pszBuf The output buffer.
248 * @param cchBuf The size of the output buffer.
249 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
250 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
251 * @param pvUser User argument for pfnGetSymbol.
252 */
253DISDECL(size_t) DISFormatYasmEx(PCDISCPUSTATE pCpu, char *pszBuf, size_t cchBuf, uint32_t fFlags,
254 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
255{
256 /*
257 * Input validation and massaging.
258 */
259 AssertPtr(pCpu);
260 AssertPtrNull(pszBuf);
261 Assert(pszBuf || !cchBuf);
262 AssertPtrNull(pfnGetSymbol);
263 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
264 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
265 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
266 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
267 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
268
269 PCOPCODE const pOp = pCpu->pCurInstr;
270
271 /*
272 * Output macros
273 */
274 char *pszDst = pszBuf;
275 size_t cchDst = cchBuf;
276 size_t cchOutput = 0;
277#define PUT_C(ch) \
278 do { \
279 cchOutput++; \
280 if (cchDst > 1) \
281 { \
282 cchDst--; \
283 *pszDst++ = (ch); \
284 } \
285 } while (0)
286#define PUT_STR(pszSrc, cchSrc) \
287 do { \
288 cchOutput += (cchSrc); \
289 if (cchDst > (cchSrc)) \
290 { \
291 memcpy(pszDst, (pszSrc), (cchSrc)); \
292 pszDst += (cchSrc); \
293 cchDst -= (cchSrc); \
294 } \
295 else if (cchDst > 1) \
296 { \
297 memcpy(pszDst, (pszSrc), cchDst - 1); \
298 pszDst += cchDst - 1; \
299 cchDst = 1; \
300 } \
301 } while (0)
302#define PUT_SZ(sz) \
303 PUT_STR((sz), sizeof(sz) - 1)
304#define PUT_SZ_STRICT(szStrict, szRelaxed) \
305 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
306#define PUT_PSZ(psz) \
307 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
308#define PUT_NUM(cch, fmt, num) \
309 do { \
310 cchOutput += (cch); \
311 if (cchDst > 1) \
312 { \
313 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
314 pszDst += cchTmp; \
315 cchDst -= cchTmp; \
316 Assert(cchTmp == (cch) || cchDst == 1); \
317 } \
318 } while (0)
319/** @todo add two flags for choosing between %X / %x and h / 0x. */
320#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
321#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
322#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
323#define PUT_NUM_64(num) PUT_NUM(18, "0%016RX64h", (uint64_t)(num))
324
325#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
326 do { \
327 if ((stype)(num) >= 0) \
328 { \
329 PUT_C('+'); \
330 PUT_NUM(cch, fmt, (utype)(num)); \
331 } \
332 else \
333 { \
334 PUT_C('-'); \
335 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
336 } \
337 } while (0)
338#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0%02xh", num, int8_t, uint8_t)
339#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0%04xh", num, int16_t, uint16_t)
340#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0%08xh", num, int32_t, uint32_t)
341#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0%016RX64h", num, int64_t, uint64_t)
342
343
344 /*
345 * The address?
346 */
347 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
348 {
349#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
350 if (pCpu->opaddr >= _4G)
351 PUT_NUM(9, "%08x`", (uint32_t)(pCpu->opaddr >> 32));
352#endif
353 PUT_NUM(8, "%08x", (uint32_t)pCpu->opaddr);
354 PUT_C(' ');
355 }
356
357 /*
358 * The opcode bytes?
359 */
360 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
361 {
362 size_t cchTmp = disFormatBytes(pCpu, pszDst, cchDst, fFlags);
363 cchOutput += cchTmp;
364 if (cchDst > 1)
365 {
366 if (cchTmp <= cchDst)
367 {
368 cchDst -= cchTmp;
369 pszDst += cchTmp;
370 }
371 else
372 {
373 pszDst += cchDst - 1;
374 cchDst = 1;
375 }
376 }
377
378 /* Some padding to align the instruction. */
379 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
380 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
381 + 2;
382 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
383 PUT_STR(g_szSpaces, cchPadding);
384 }
385
386
387 /*
388 * Filter out invalid opcodes first as they need special
389 * treatment. UD2 is an exception and should be handled normally.
390 */
391 size_t const offInstruction = cchOutput;
392 if ( pOp->opcode == OP_INVALID
393 || ( pOp->opcode == OP_ILLUD2
394 && (pCpu->prefix & PREFIX_LOCK)))
395 {
396
397 }
398 else
399 {
400 /*
401 * Prefixes
402 */
403 if (pCpu->prefix & PREFIX_LOCK)
404 PUT_SZ("lock ");
405 if(pCpu->prefix & PREFIX_REP)
406 PUT_SZ("rep ");
407 else if(pCpu->prefix & PREFIX_REPNE)
408 PUT_SZ("repne ");
409
410 /*
411 * Adjust the format string to the correct mnemonic
412 * or to avoid things the assembler cannot handle correctly.
413 */
414 char szTmpFmt[48];
415 const char *pszFmt = pOp->pszOpcode;
416 switch (pOp->opcode)
417 {
418 case OP_JECXZ:
419 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "jcxz %Jb" : pCpu->opmode == CPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
420 break;
421 case OP_PUSHF:
422 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushfw" : pCpu->opmode == CPUMODE_32BIT ? "pushfd" : "pushfq";
423 break;
424 case OP_POPF:
425 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popfw" : pCpu->opmode == CPUMODE_32BIT ? "popfd" : "popfq";
426 break;
427 case OP_PUSHA:
428 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushaw" : "pushad";
429 break;
430 case OP_POPA:
431 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popaw" : "popad";
432 break;
433 case OP_INSB:
434 pszFmt = "insb";
435 break;
436 case OP_INSWD:
437 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "insw" : pCpu->opmode == CPUMODE_32BIT ? "insd" : "insq";
438 break;
439 case OP_OUTSB:
440 pszFmt = "outsb";
441 break;
442 case OP_OUTSWD:
443 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "outsw" : pCpu->opmode == CPUMODE_32BIT ? "outsd" : "outsq";
444 break;
445 case OP_MOVSB:
446 pszFmt = "movsb";
447 break;
448 case OP_MOVSWD:
449 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "movsw" : pCpu->opmode == CPUMODE_32BIT ? "movsd" : "movsq";
450 break;
451 case OP_CMPSB:
452 pszFmt = "cmpsb";
453 break;
454 case OP_CMPWD:
455 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cmpsw" : pCpu->opmode == CPUMODE_32BIT ? "cmpsd" : "cmpsq";
456 break;
457 case OP_SCASB:
458 pszFmt = "scasb";
459 break;
460 case OP_SCASWD:
461 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "scasw" : pCpu->opmode == CPUMODE_32BIT ? "scasd" : "scasq";
462 break;
463 case OP_LODSB:
464 pszFmt = "lodsb";
465 break;
466 case OP_LODSWD:
467 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "lodsw" : pCpu->opmode == CPUMODE_32BIT ? "lodsd" : "lodsq";
468 break;
469 case OP_STOSB:
470 pszFmt = "stosb";
471 break;
472 case OP_STOSWD:
473 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "stosw" : pCpu->opmode == CPUMODE_32BIT ? "stosd" : "stosq";
474 break;
475 case OP_CBW:
476 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cbw" : pCpu->opmode == CPUMODE_32BIT ? "cwde" : "cdqe";
477 break;
478 case OP_CWD:
479 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cwd" : pCpu->opmode == CPUMODE_32BIT ? "cdq" : "cqo";
480 break;
481 case OP_SHL:
482 Assert(pszFmt[3] == '/');
483 pszFmt += 4;
484 break;
485 case OP_XLAT:
486 pszFmt = "xlatb";
487 break;
488 case OP_INT3:
489 pszFmt = "int3";
490 break;
491
492 /*
493 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
494 */
495 case OP_NOP:
496 if (pCpu->opcode == 0x90)
497 /* fine, fine */;
498 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
499 pszFmt = "prefetch %Eb";
500 else if (pCpu->opcode == 0x1f)
501 {
502 Assert(pCpu->opsize >= 3);
503 PUT_SZ("db 00fh, 01fh,");
504 PUT_NUM_8(pCpu->ModRM.u);
505 for (unsigned i = 3; i < pCpu->opsize; i++)
506 {
507 PUT_C(',');
508 PUT_NUM_8(0x90); ///@todo fixme.
509 }
510 pszFmt = "";
511 }
512 break;
513
514 default:
515 /* ST(X) -> stX (floating point) */
516 if (*pszFmt == 'f' && strchr(pszFmt, '('))
517 {
518 char *pszFmtDst = szTmpFmt;
519 char ch;
520 do
521 {
522 ch = *pszFmt++;
523 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
524 {
525 *pszFmtDst++ = 's';
526 *pszFmtDst++ = 't';
527 pszFmt += 2;
528 ch = *pszFmt;
529 Assert(pszFmt[1] == ')');
530 pszFmt += 2;
531 *pszFmtDst++ = ch;
532 }
533 else
534 *pszFmtDst++ = ch;
535 } while (ch != '\0');
536 pszFmt = szTmpFmt;
537 }
538 break;
539
540 /*
541 * Horrible hacks.
542 */
543 case OP_FLD:
544 if (pCpu->opcode == 0xdb) /* m80fp workaround. */
545 *(int *)&pCpu->param1.param &= ~0x1f; /* make it pure OP_PARM_M */
546 break;
547 case OP_LAR: /* hack w -> v, probably not correct. */
548 *(int *)&pCpu->param2.param &= ~0x1f;
549 *(int *)&pCpu->param2.param |= OP_PARM_v;
550 break;
551 }
552
553 /*
554 * Formatting context and associated macros.
555 */
556 PCOP_PARAMETER pParam = &pCpu->param1;
557 int iParam = 1;
558
559#define PUT_FAR() \
560 do { \
561 if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_p \
562 && pOp->opcode != OP_LDS /* table bugs? */ \
563 && pOp->opcode != OP_LES \
564 && pOp->opcode != OP_LFS \
565 && pOp->opcode != OP_LGS \
566 && pOp->opcode != OP_LSS ) \
567 PUT_SZ("far "); \
568 } while (0)
569 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
570 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
571#define PUT_SIZE_OVERRIDE() \
572 do { \
573 switch (OP_PARM_VSUBTYPE(pParam->param)) \
574 { \
575 case OP_PARM_v: \
576 switch (pCpu->opmode) \
577 { \
578 case CPUMODE_16BIT: PUT_SZ("word "); break; \
579 case CPUMODE_32BIT: PUT_SZ("dword "); break; \
580 case CPUMODE_64BIT: PUT_SZ("qword "); break; \
581 default: break; \
582 } \
583 break; \
584 case OP_PARM_b: PUT_SZ("byte "); break; \
585 case OP_PARM_w: PUT_SZ("word "); break; \
586 case OP_PARM_d: PUT_SZ("dword "); break; \
587 case OP_PARM_q: PUT_SZ("qword "); break; \
588 case OP_PARM_dq: \
589 if (OP_PARM_VTYPE(pParam->param) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
590 PUT_SZ("qword "); \
591 break; \
592 case OP_PARM_p: break; /* see PUT_FAR */ \
593 case OP_PARM_s: if (pParam->flags & USE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
594 case OP_PARM_z: break; \
595 case OP_PARM_NONE: \
596 if ( OP_PARM_VTYPE(pParam->param) == OP_PARM_M \
597 && ((pParam->flags & USE_REG_FP) || pOp->opcode == OP_FLD)) \
598 PUT_SZ("tword "); \
599 break; \
600 default: break; /*no pointer type specified/necessary*/ \
601 } \
602 } while (0)
603 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
604#define PUT_SEGMENT_OVERRIDE() \
605 do { \
606 if (pCpu->prefix & PREFIX_SEG) \
607 PUT_STR(s_szSegPrefix[pCpu->enmPrefixSeg], 3); \
608 } while (0)
609
610
611 /*
612 * Segment prefixing for instructions that doesn't do memory access.
613 */
614 if ( (pCpu->prefix & PREFIX_SEG)
615 && !DIS_IS_EFFECTIVE_ADDR(pCpu->param1.flags)
616 && !DIS_IS_EFFECTIVE_ADDR(pCpu->param2.flags)
617 && !DIS_IS_EFFECTIVE_ADDR(pCpu->param3.flags))
618 {
619 PUT_STR(s_szSegPrefix[pCpu->enmPrefixSeg], 2);
620 PUT_C(' ');
621 }
622
623
624 /*
625 * The formatting loop.
626 */
627 RTINTPTR off;
628 char szSymbol[128];
629 char ch;
630 while ((ch = *pszFmt++) != '\0')
631 {
632 if (ch == '%')
633 {
634 ch = *pszFmt++;
635 switch (ch)
636 {
637 /*
638 * ModRM - Register only.
639 */
640 case 'C': /* Control register (ParseModRM / UseModRM). */
641 case 'D': /* Debug register (ParseModRM / UseModRM). */
642 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
643 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
644 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
645 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
646 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
647 {
648 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
649 Assert(!(pParam->flags & (USE_INDEX | USE_SCALE) /* No SIB here... */));
650 Assert(!(pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32)));
651
652 size_t cchReg;
653 const char *pszReg = disasmFormatYasmBaseReg(pCpu, pParam, &cchReg);
654 PUT_STR(pszReg, cchReg);
655 break;
656 }
657
658 /*
659 * ModRM - Register or memory.
660 */
661 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
662 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
663 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
664 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
665 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
666 {
667 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
668
669 PUT_FAR();
670 if (DIS_IS_EFFECTIVE_ADDR(pParam->flags))
671 {
672 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
673 while the register variants deals with 16, 32 & 64 in the normal fashion. */
674 if ( pParam->param != OP_PARM_Ev
675 || pOp->opcode != OP_MOV
676 || ( pOp->param1 != OP_PARM_Sw
677 && pOp->param2 != OP_PARM_Sw))
678 PUT_SIZE_OVERRIDE();
679 PUT_C('[');
680 }
681 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
682 && (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32)))
683 {
684 if ( (pParam->flags & USE_DISPLACEMENT8)
685 && !pParam->disp8)
686 PUT_SZ("byte ");
687 else if ( (pParam->flags & USE_DISPLACEMENT16)
688 && (int8_t)pParam->disp16 == (int16_t)pParam->disp16)
689 PUT_SZ("word ");
690 else if ( (pParam->flags & USE_DISPLACEMENT32)
691 && (int8_t)pParam->disp32 == (int32_t)pParam->disp32)
692 PUT_SZ("dword ");
693 else if ( (pParam->flags & USE_DISPLACEMENT64)
694 && (int8_t)pParam->disp64 == (int64_t)pParam->disp32)
695 PUT_SZ("qword ");
696 }
697 if (DIS_IS_EFFECTIVE_ADDR(pParam->flags))
698 PUT_SEGMENT_OVERRIDE();
699
700 bool fBase = (pParam->flags & USE_BASE) /* When exactly is USE_BASE supposed to be set? disasmModRMReg doesn't set it. */
701 || ( (pParam->flags & (USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64))
702 && !DIS_IS_EFFECTIVE_ADDR(pParam->flags));
703 if (fBase)
704 {
705 size_t cchReg;
706 const char *pszReg = disasmFormatYasmBaseReg(pCpu, pParam, &cchReg);
707 PUT_STR(pszReg, cchReg);
708 }
709
710 if (pParam->flags & USE_INDEX)
711 {
712 if (fBase)
713 PUT_C('+');
714
715 size_t cchReg;
716 const char *pszReg = disasmFormatYasmIndexReg(pCpu, pParam, &cchReg);
717 PUT_STR(pszReg, cchReg);
718
719 if (pParam->flags & USE_SCALE)
720 {
721 PUT_C('*');
722 PUT_C('0' + pParam->scale);
723 }
724 }
725 else
726 Assert(!(pParam->flags & USE_SCALE));
727
728 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
729 {
730 int64_t off2;
731 if (pParam->flags & USE_DISPLACEMENT8)
732 off2 = pParam->disp8;
733 else if (pParam->flags & USE_DISPLACEMENT16)
734 off2 = pParam->disp16;
735 else if (pParam->flags & (USE_DISPLACEMENT32 | USE_RIPDISPLACEMENT32))
736 off2 = pParam->disp32;
737 else if (pParam->flags & USE_DISPLACEMENT64)
738 off2 = pParam->disp64;
739 else
740 {
741 AssertFailed();
742 off2 = 0;
743 }
744
745 if (fBase || (pParam->flags & USE_INDEX))
746 {
747 PUT_C(off2 >= 0 ? '+' : '-');
748 if (off2 < 0)
749 off2 = -off2;
750 }
751 if (pParam->flags & USE_DISPLACEMENT8)
752 PUT_NUM_8( off2);
753 else if (pParam->flags & USE_DISPLACEMENT16)
754 PUT_NUM_16(off2);
755 else if (pParam->flags & USE_DISPLACEMENT32)
756 PUT_NUM_32(off2);
757 else if (pParam->flags & USE_DISPLACEMENT64)
758 PUT_NUM_64(off2);
759 else
760 {
761 PUT_NUM_32(off2);
762 PUT_SZ(" wrt rip"); //??
763 }
764 }
765
766 if (DIS_IS_EFFECTIVE_ADDR(pParam->flags))
767 PUT_C(']');
768 break;
769 }
770
771 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
772 AssertFailed();
773 break;
774
775 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
776 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
777 switch (pParam->flags & ( USE_IMMEDIATE8 | USE_IMMEDIATE16 | USE_IMMEDIATE32 | USE_IMMEDIATE64
778 | USE_IMMEDIATE16_SX8 | USE_IMMEDIATE32_SX8 | USE_IMMEDIATE64_SX8))
779 {
780 case USE_IMMEDIATE8:
781 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
782 && ( (pOp->param1 >= OP_PARM_REG_GEN8_START && pOp->param1 <= OP_PARM_REG_GEN8_END)
783 || (pOp->param2 >= OP_PARM_REG_GEN8_START && pOp->param2 <= OP_PARM_REG_GEN8_END))
784 )
785 PUT_SZ("strict byte ");
786 PUT_NUM_8(pParam->parval);
787 break;
788
789 case USE_IMMEDIATE16:
790 if ( pCpu->mode != pCpu->opmode
791 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
792 && ( (int8_t)pParam->parval == (int16_t)pParam->parval
793 || (pOp->param1 >= OP_PARM_REG_GEN16_START && pOp->param1 <= OP_PARM_REG_GEN16_END)
794 || (pOp->param2 >= OP_PARM_REG_GEN16_START && pOp->param2 <= OP_PARM_REG_GEN16_END))
795 )
796 )
797 {
798 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
799 PUT_SZ_STRICT("strict byte ", "byte ");
800 else if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v
801 || OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_z)
802 PUT_SZ_STRICT("strict word ", "word ");
803 }
804 PUT_NUM_16(pParam->parval);
805 break;
806
807 case USE_IMMEDIATE16_SX8:
808 PUT_SZ_STRICT("strict byte ", "byte ");
809 PUT_NUM_16(pParam->parval);
810 break;
811
812 case USE_IMMEDIATE32:
813 if ( pCpu->opmode != (pCpu->mode == CPUMODE_16BIT ? CPUMODE_16BIT : CPUMODE_32BIT) /* not perfect */
814 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
815 && ( (int8_t)pParam->parval == (int32_t)pParam->parval
816 || (pOp->param1 >= OP_PARM_REG_GEN32_START && pOp->param1 <= OP_PARM_REG_GEN32_END)
817 || (pOp->param2 >= OP_PARM_REG_GEN32_START && pOp->param2 <= OP_PARM_REG_GEN32_END))
818 )
819 )
820 {
821 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
822 PUT_SZ_STRICT("strict byte ", "byte ");
823 else if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v
824 || OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_z)
825 PUT_SZ_STRICT("strict dword ", "dword ");
826 }
827 PUT_NUM_32(pParam->parval);
828 break;
829
830 case USE_IMMEDIATE32_SX8:
831 PUT_SZ_STRICT("strict byte ", "byte ");
832 PUT_NUM_32(pParam->parval);
833 break;
834
835 case USE_IMMEDIATE64_SX8:
836 PUT_SZ_STRICT("strict byte ", "byte ");
837 PUT_NUM_64(pParam->parval);
838 break;
839
840 case USE_IMMEDIATE64:
841 PUT_NUM_64(pParam->parval);
842 break;
843
844 default:
845 AssertFailed();
846 break;
847 }
848 break;
849
850 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
851 {
852 int32_t offDisplacement;
853 Assert(iParam == 1);
854 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
855 && pOp->opcode != OP_CALL
856 && pOp->opcode != OP_LOOP
857 && pOp->opcode != OP_LOOPE
858 && pOp->opcode != OP_LOOPNE
859 && pOp->opcode != OP_JECXZ;
860 if (pOp->opcode == OP_CALL)
861 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
862
863 if (pParam->flags & USE_IMMEDIATE8_REL)
864 {
865 if (fPrefix)
866 PUT_SZ("short ");
867 offDisplacement = (int8_t)pParam->parval;
868 Assert(*pszFmt == 'b'); pszFmt++;
869
870 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
871 PUT_NUM_S8(offDisplacement);
872 }
873 else if (pParam->flags & USE_IMMEDIATE16_REL)
874 {
875 if (fPrefix)
876 PUT_SZ("near ");
877 offDisplacement = (int16_t)pParam->parval;
878 Assert(*pszFmt == 'v'); pszFmt++;
879
880 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
881 PUT_NUM_S16(offDisplacement);
882 }
883 else
884 {
885 if (fPrefix)
886 PUT_SZ("near ");
887 offDisplacement = (int32_t)pParam->parval;
888 Assert(pParam->flags & (USE_IMMEDIATE32_REL|USE_IMMEDIATE64_REL));
889 Assert(*pszFmt == 'v'); pszFmt++;
890
891 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
892 PUT_NUM_S32(offDisplacement);
893 }
894 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
895 PUT_SZ(" (");
896
897 RTUINTPTR uTrgAddr = pCpu->opaddr + pCpu->opsize + offDisplacement;
898 if (pCpu->mode == CPUMODE_16BIT)
899 PUT_NUM_16(uTrgAddr);
900 else if (pCpu->mode == CPUMODE_32BIT)
901 PUT_NUM_32(uTrgAddr);
902 else
903 PUT_NUM_64(uTrgAddr);
904
905 if (pfnGetSymbol)
906 {
907 int rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), uTrgAddr, szSymbol, sizeof(szSymbol), &off, pvUser);
908 if (RT_SUCCESS(rc))
909 {
910 PUT_SZ(" [");
911 PUT_PSZ(szSymbol);
912 if (off != 0)
913 {
914 if ((int8_t)off == off)
915 PUT_NUM_S8(off);
916 else if ((int16_t)off == off)
917 PUT_NUM_S16(off);
918 else if ((int32_t)off == off)
919 PUT_NUM_S32(off);
920 else
921 PUT_NUM_S64(off);
922 }
923 PUT_C(']');
924 }
925 }
926
927 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
928 PUT_C(')');
929 break;
930 }
931
932 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
933 {
934 Assert(*pszFmt == 'p'); pszFmt++;
935 PUT_FAR();
936 PUT_SIZE_OVERRIDE();
937 PUT_SEGMENT_OVERRIDE();
938 int rc = VERR_SYMBOL_NOT_FOUND;
939 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
940 {
941 case USE_IMMEDIATE_ADDR_16_16:
942 PUT_NUM_16(pParam->parval >> 16);
943 PUT_C(':');
944 PUT_NUM_16(pParam->parval);
945 if (pfnGetSymbol)
946 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_VALUE(pParam->parval >> 16), (uint16_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
947 break;
948 case USE_IMMEDIATE_ADDR_16_32:
949 PUT_NUM_16(pParam->parval >> 32);
950 PUT_C(':');
951 PUT_NUM_32(pParam->parval);
952 if (pfnGetSymbol)
953 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_VALUE(pParam->parval >> 16), (uint32_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
954 break;
955 case USE_DISPLACEMENT16:
956 PUT_NUM_16(pParam->parval);
957 if (pfnGetSymbol)
958 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint16_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
959 break;
960 case USE_DISPLACEMENT32:
961 PUT_NUM_32(pParam->parval);
962 if (pfnGetSymbol)
963 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint32_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
964 break;
965 case USE_DISPLACEMENT64:
966 PUT_NUM_64(pParam->parval);
967 if (pfnGetSymbol)
968 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint64_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
969 break;
970 default:
971 AssertFailed();
972 break;
973 }
974
975 if (RT_SUCCESS(rc))
976 {
977 PUT_SZ(" [");
978 PUT_PSZ(szSymbol);
979 if (off != 0)
980 {
981 if ((int8_t)off == off)
982 PUT_NUM_S8(off);
983 else if ((int16_t)off == off)
984 PUT_NUM_S16(off);
985 else if ((int32_t)off == off)
986 PUT_NUM_S32(off);
987 else
988 PUT_NUM_S64(off);
989 }
990 PUT_C(']');
991 }
992 break;
993 }
994
995 case 'O': /* No ModRM byte (ParseImmAddr). */
996 {
997 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
998 PUT_FAR();
999 PUT_SIZE_OVERRIDE();
1000 PUT_C('[');
1001 PUT_SEGMENT_OVERRIDE();
1002 int rc = VERR_SYMBOL_NOT_FOUND;
1003 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
1004 {
1005 case USE_IMMEDIATE_ADDR_16_16:
1006 PUT_NUM_16(pParam->parval >> 16);
1007 PUT_C(':');
1008 PUT_NUM_16(pParam->parval);
1009 if (pfnGetSymbol)
1010 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_VALUE(pParam->parval >> 16), (uint16_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
1011 break;
1012 case USE_IMMEDIATE_ADDR_16_32:
1013 PUT_NUM_16(pParam->parval >> 32);
1014 PUT_C(':');
1015 PUT_NUM_32(pParam->parval);
1016 if (pfnGetSymbol)
1017 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_VALUE(pParam->parval >> 16), (uint32_t)pParam->parval, szSymbol, sizeof(szSymbol), &off, pvUser);
1018 break;
1019 case USE_DISPLACEMENT16:
1020 PUT_NUM_16(pParam->disp16);
1021 if (pfnGetSymbol)
1022 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint16_t)pParam->disp16, szSymbol, sizeof(szSymbol), &off, pvUser);
1023 break;
1024 case USE_DISPLACEMENT32:
1025 PUT_NUM_32(pParam->disp32);
1026 if (pfnGetSymbol)
1027 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint32_t)pParam->disp32, szSymbol, sizeof(szSymbol), &off, pvUser);
1028 break;
1029 case USE_DISPLACEMENT64:
1030 PUT_NUM_64(pParam->disp64);
1031 if (pfnGetSymbol)
1032 rc = pfnGetSymbol(pCpu, DIS_FMT_SEL_FROM_REG(DIS_SELREG_CS), (uint64_t)pParam->disp64, szSymbol, sizeof(szSymbol), &off, pvUser);
1033 break;
1034 default:
1035 AssertFailed();
1036 break;
1037 }
1038 PUT_C(']');
1039
1040 if (RT_SUCCESS(rc))
1041 {
1042 PUT_SZ(" (");
1043 PUT_PSZ(szSymbol);
1044 if (off != 0)
1045 {
1046 if ((int8_t)off == off)
1047 PUT_NUM_S8(off);
1048 else if ((int16_t)off == off)
1049 PUT_NUM_S16(off);
1050 else if ((int32_t)off == off)
1051 PUT_NUM_S32(off);
1052 else
1053 PUT_NUM_S64(off);
1054 }
1055 PUT_C(')');
1056 }
1057 break;
1058 }
1059
1060 case 'X': /* DS:SI (ParseXb, ParseXv). */
1061 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1062 {
1063 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1064 PUT_FAR();
1065 PUT_SIZE_OVERRIDE();
1066 PUT_C('[');
1067 if (pParam->flags & USE_POINTER_DS_BASED)
1068 PUT_SZ("ds:");
1069 else
1070 PUT_SZ("es:");
1071
1072 size_t cchReg;
1073 const char *pszReg = disasmFormatYasmBaseReg(pCpu, pParam, &cchReg);
1074 PUT_STR(pszReg, cchReg);
1075 PUT_C(']');
1076 break;
1077 }
1078
1079 case 'e': /* Register based on operand size (e.g. %eAX) (ParseFixedReg). */
1080 {
1081 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2])); pszFmt += 2;
1082 size_t cchReg;
1083 const char *pszReg = disasmFormatYasmBaseReg(pCpu, pParam, &cchReg);
1084 PUT_STR(pszReg, cchReg);
1085 break;
1086 }
1087
1088 default:
1089 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1090 break;
1091 }
1092 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1093 }
1094 else
1095 {
1096 PUT_C(ch);
1097 if (ch == ',')
1098 {
1099 Assert(*pszFmt != ' ');
1100 PUT_C(' ');
1101 switch (++iParam)
1102 {
1103 case 2: pParam = &pCpu->param2; break;
1104 case 3: pParam = &pCpu->param3; break;
1105 default: pParam = NULL; break;
1106 }
1107 }
1108 }
1109 } /* while more to format */
1110 }
1111
1112 /*
1113 * Any additional output to the right of the instruction?
1114 */
1115 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1116 {
1117 /* some up front padding. */
1118 size_t cchPadding = cchOutput - offInstruction;
1119 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1120 PUT_STR(g_szSpaces, cchPadding);
1121
1122 /* comment? */
1123 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1124 PUT_SZ(";");
1125
1126 /*
1127 * The address?
1128 */
1129 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1130 {
1131 PUT_C(' ');
1132#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
1133 if (pCpu->opaddr >= _4G)
1134 PUT_NUM(9, "%08x`", (uint32_t)(pCpu->opaddr >> 32));
1135#endif
1136 PUT_NUM(8, "%08x", (uint32_t)pCpu->opaddr);
1137 }
1138
1139 /*
1140 * Opcode bytes?
1141 */
1142 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1143 {
1144 PUT_C(' ');
1145 size_t cchTmp = disFormatBytes(pCpu, pszDst, cchDst, fFlags);
1146 cchOutput += cchTmp;
1147 if (cchTmp >= cchDst)
1148 cchTmp = cchDst - (cchDst != 0);
1149 cchDst -= cchTmp;
1150 pszDst += cchTmp;
1151 }
1152 }
1153
1154 /*
1155 * Terminate it - on overflow we'll have reserved one byte for this.
1156 */
1157 if (cchDst > 0)
1158 *pszDst = '\0';
1159 else
1160 Assert(!cchBuf);
1161
1162 /* clean up macros */
1163#undef PUT_PSZ
1164#undef PUT_SZ
1165#undef PUT_STR
1166#undef PUT_C
1167 return cchOutput;
1168}
1169
1170
1171/**
1172 * Formats the current instruction in Yasm (/ Nasm) style.
1173 *
1174 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1175 *
1176 *
1177 * @returns The number of output characters. If this is >= cchBuf, then the content
1178 * of pszBuf will be truncated.
1179 * @param pCpu Pointer to the disassembler CPU state.
1180 * @param pszBuf The output buffer.
1181 * @param cchBuf The size of the output buffer.
1182 */
1183DISDECL(size_t) DISFormatYasm(PCDISCPUSTATE pCpu, char *pszBuf, size_t cchBuf)
1184{
1185 return DISFormatYasmEx(pCpu, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
1186}
1187
1188
1189/**
1190 * Checks if the encoding of the given disassembled instruction is something we
1191 * can never get YASM to produce.
1192 *
1193 * @returns true if it's odd, false if it isn't.
1194 * @param pCpu The disassembler output. The byte fetcher callback will
1195 * be used if present as we might need to fetch opcode
1196 * bytes.
1197 */
1198DISDECL(bool) DISFormatYasmIsOddEncoding(PDISCPUSTATE pCpu)
1199{
1200 /*
1201 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1202 */
1203 if ( pCpu->addrmode != CPUMODE_16BIT ///@todo correct?
1204 && pCpu->ModRM.Bits.Rm == 4
1205 && pCpu->ModRM.Bits.Mod != 3)
1206 {
1207 /* No scaled index SIB (index=4), except for ESP. */
1208 if ( pCpu->SIB.Bits.Index == 4
1209 && pCpu->SIB.Bits.Base != 4)
1210 return true;
1211
1212 /* EBP + displacement */
1213 if ( pCpu->ModRM.Bits.Mod != 0
1214 && pCpu->SIB.Bits.Base == 5
1215 && pCpu->SIB.Bits.Scale == 0)
1216 return true;
1217 }
1218
1219 /*
1220 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1221 */
1222 if ( pCpu->pCurInstr->opcode == OP_SHL
1223 && pCpu->ModRM.Bits.Reg == 6)
1224 return true;
1225
1226 /*
1227 * Check for multiple prefixes of the same kind.
1228 */
1229 uint32_t fPrefixes = 0;
1230 for (uint32_t offOpcode = 0; offOpcode < 32; offOpcode++)
1231 {
1232 uint32_t f;
1233 switch (DISReadByte(pCpu, offOpcode + pCpu->opaddr))
1234 {
1235 case 0xf0:
1236 f = PREFIX_LOCK;
1237 break;
1238
1239 case 0xf2:
1240 case 0xf3:
1241 f = PREFIX_REP; /* yes, both */
1242 break;
1243
1244 case 0x2e:
1245 case 0x3e:
1246 case 0x26:
1247 case 0x36:
1248 case 0x64:
1249 case 0x65:
1250 f = PREFIX_SEG;
1251 break;
1252
1253 case 0x66:
1254 f = PREFIX_OPSIZE;
1255 break;
1256
1257 case 0x67:
1258 f = PREFIX_ADDRSIZE;
1259 break;
1260
1261 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1262 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1263 f = pCpu->mode == CPUMODE_64BIT ? PREFIX_REX : 0;
1264 break;
1265
1266 default:
1267 f = 0;
1268 break;
1269 }
1270 if (!f)
1271 break; /* done */
1272 if (fPrefixes & f)
1273 return true;
1274 fPrefixes |= f;
1275 }
1276
1277 /* segment overrides are fun */
1278 if (fPrefixes & PREFIX_SEG)
1279 {
1280 /* no effective address which it may apply to. */
1281 Assert((pCpu->prefix & PREFIX_SEG) || pCpu->mode == CPUMODE_64BIT);
1282 if ( !DIS_IS_EFFECTIVE_ADDR(pCpu->param1.flags)
1283 && !DIS_IS_EFFECTIVE_ADDR(pCpu->param2.flags)
1284 && !DIS_IS_EFFECTIVE_ADDR(pCpu->param3.flags))
1285 return true;
1286 }
1287
1288 /* fixed register + addr override doesn't go down all that well. */
1289 if (fPrefixes & PREFIX_ADDRSIZE)
1290 {
1291 Assert(pCpu->prefix & PREFIX_ADDRSIZE);
1292 if ( pCpu->pCurInstr->param3 == OP_PARM_NONE
1293 && pCpu->pCurInstr->param2 == OP_PARM_NONE
1294 && ( pCpu->pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1295 && pCpu->pCurInstr->param1 <= OP_PARM_REG_GEN32_END))
1296 return true;
1297 }
1298
1299 /* Almost all prefixes are bad. */
1300 if (fPrefixes)
1301 {
1302 switch (pCpu->pCurInstr->opcode)
1303 {
1304 /* nop w/ prefix(es). */
1305 case OP_NOP:
1306 return true;
1307
1308 case OP_JMP:
1309 if ( pCpu->pCurInstr->param1 != OP_PARM_Jb
1310 && pCpu->pCurInstr->param1 != OP_PARM_Jv)
1311 break;
1312 /* fall thru */
1313 case OP_JO:
1314 case OP_JNO:
1315 case OP_JC:
1316 case OP_JNC:
1317 case OP_JE:
1318 case OP_JNE:
1319 case OP_JBE:
1320 case OP_JNBE:
1321 case OP_JS:
1322 case OP_JNS:
1323 case OP_JP:
1324 case OP_JNP:
1325 case OP_JL:
1326 case OP_JNL:
1327 case OP_JLE:
1328 case OP_JNLE:
1329 /** @todo branch hinting 0x2e/0x3e... */
1330 return true;
1331 }
1332
1333 }
1334
1335 /* All but the segment prefix is bad news. */
1336 if (fPrefixes & ~PREFIX_SEG)
1337 {
1338 switch (pCpu->pCurInstr->opcode)
1339 {
1340 case OP_POP:
1341 case OP_PUSH:
1342 if ( pCpu->pCurInstr->param1 >= OP_PARM_REG_SEG_START
1343 && pCpu->pCurInstr->param1 <= OP_PARM_REG_SEG_END)
1344 return true;
1345 if ( (fPrefixes & ~PREFIX_OPSIZE)
1346 && pCpu->pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1347 && pCpu->pCurInstr->param1 <= OP_PARM_REG_GEN32_END)
1348 return true;
1349 break;
1350
1351 case OP_POPA:
1352 case OP_POPF:
1353 case OP_PUSHA:
1354 case OP_PUSHF:
1355 if (fPrefixes & ~PREFIX_OPSIZE)
1356 return true;
1357 break;
1358 }
1359 }
1360
1361 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1362 if ( (fPrefixes & PREFIX_OPSIZE)
1363 && ( ( pCpu->pCurInstr->param1 == OP_PARM_Gb /* r8 */
1364 && pCpu->pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1365 || ( pCpu->pCurInstr->param2 == OP_PARM_Gb /* r8 */
1366 && pCpu->pCurInstr->param1 == OP_PARM_Eb /* r8/mem8 */))
1367 )
1368 {
1369 switch (pCpu->pCurInstr->opcode)
1370 {
1371 case OP_ADD:
1372 case OP_OR:
1373 case OP_ADC:
1374 case OP_SBB:
1375 case OP_AND:
1376 case OP_SUB:
1377 case OP_XOR:
1378 case OP_CMP:
1379 return true;
1380 default:
1381 break;
1382 }
1383 }
1384
1385
1386 /*
1387 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1388 *
1389 * For example:
1390 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1391 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1392 */
1393 if (pCpu->ModRM.Bits.Mod == 3 /* reg,reg */)
1394 {
1395 switch (pCpu->pCurInstr->opcode)
1396 {
1397 case OP_ADD:
1398 case OP_OR:
1399 case OP_ADC:
1400 case OP_SBB:
1401 case OP_AND:
1402 case OP_SUB:
1403 case OP_XOR:
1404 case OP_CMP:
1405 if ( ( pCpu->pCurInstr->param1 == OP_PARM_Gb /* r8 */
1406 && pCpu->pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1407 || ( pCpu->pCurInstr->param1 == OP_PARM_Gv /* rX */
1408 && pCpu->pCurInstr->param2 == OP_PARM_Ev /* rX/memX */))
1409 return true;
1410
1411 /* 82 (see table A-6). */
1412 if (pCpu->opcode == 0x82)
1413 return true;
1414 break;
1415
1416 /* ff /0, fe /0, ff /1, fe /0 */
1417 case OP_DEC:
1418 case OP_INC:
1419 return true;
1420
1421 case OP_POP:
1422 case OP_PUSH:
1423 Assert(pCpu->opcode == 0x8f);
1424 return true;
1425
1426 case OP_MOV:
1427 if ( pCpu->opcode == 0x8a
1428 || pCpu->opcode == 0x8b)
1429 return true;
1430 break;
1431
1432 default:
1433 break;
1434 }
1435 }
1436
1437 /* shl eax,1 will be assembled to the form without the immediate byte. */
1438 if ( pCpu->pCurInstr->param2 == OP_PARM_Ib
1439 && (uint8_t)pCpu->param2.parval == 1)
1440 {
1441 switch (pCpu->pCurInstr->opcode)
1442 {
1443 case OP_SHL:
1444 case OP_SHR:
1445 case OP_SAR:
1446 case OP_RCL:
1447 case OP_RCR:
1448 case OP_ROL:
1449 case OP_ROR:
1450 return true;
1451 }
1452 }
1453
1454 /* And some more - see table A-6. */
1455 if (pCpu->opcode == 0x82)
1456 {
1457 switch (pCpu->pCurInstr->opcode)
1458 {
1459 case OP_ADD:
1460 case OP_OR:
1461 case OP_ADC:
1462 case OP_SBB:
1463 case OP_AND:
1464 case OP_SUB:
1465 case OP_XOR:
1466 case OP_CMP:
1467 return true;
1468 break;
1469 }
1470 }
1471
1472
1473 /* check for REX.X = 1 without SIB. */
1474
1475 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1476 says (intel doesn't appear to care). */
1477 switch (pCpu->pCurInstr->opcode)
1478 {
1479 case OP_SETO:
1480 case OP_SETNO:
1481 case OP_SETC:
1482 case OP_SETNC:
1483 case OP_SETE:
1484 case OP_SETNE:
1485 case OP_SETBE:
1486 case OP_SETNBE:
1487 case OP_SETS:
1488 case OP_SETNS:
1489 case OP_SETP:
1490 case OP_SETNP:
1491 case OP_SETL:
1492 case OP_SETNL:
1493 case OP_SETLE:
1494 case OP_SETNLE:
1495 AssertMsg(pCpu->opcode >= 0x90 && pCpu->opcode <= 0x9f, ("%#x\n", pCpu->opcode));
1496 if (pCpu->ModRM.Bits.Reg != 2)
1497 return true;
1498 break;
1499 }
1500
1501 /*
1502 * The MOVZX reg32,mem16 instruction without an operand size prefix
1503 * doesn't quite make sense...
1504 */
1505 if ( pCpu->pCurInstr->opcode == OP_MOVZX
1506 && pCpu->opcode == 0xB7
1507 && (pCpu->mode == CPUMODE_16BIT) != !!(fPrefixes & PREFIX_OPSIZE))
1508 return true;
1509
1510 return false;
1511}
1512
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette