VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp@ 100763

Last change on this file since 100763 was 99220, checked in by vboxsync, 21 months ago

Disassember,*: Start separating the disassembler into a architecture specific and common part, bugref:10394

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 72.2 KB
Line 
1/* $Id: DisasmFormatYasm.cpp 99220 2023-03-30 12:40:46Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include <VBox/dis.h>
33#include "DisasmInternal.h"
34#include <iprt/assert.h>
35#include <iprt/ctype.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Global Variables *
42*********************************************************************************************************************************/
43static const char g_szSpaces[] =
44" ";
45static const char g_aszYasmRegGen8[20][5] =
46{
47 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
48};
49static const char g_aszYasmRegGen16[16][5] =
50{
51 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
52};
53#if 0 /* unused */
54static const char g_aszYasmRegGen1616[8][6] =
55{
56 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
57};
58#endif
59static const char g_aszYasmRegGen32[16][5] =
60{
61 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
62};
63static const char g_aszYasmRegGen64[16][4] =
64{
65 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
66};
67static const char g_aszYasmRegSeg[6][3] =
68{
69 "es", "cs", "ss", "ds", "fs", "gs"
70};
71static const char g_aszYasmRegFP[8][4] =
72{
73 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
74};
75static const char g_aszYasmRegMMX[8][4] =
76{
77 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
78};
79static const char g_aszYasmRegXMM[16][6] =
80{
81 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
82};
83static const char g_aszYasmRegYMM[16][6] =
84{
85 "ymm0\0", "ymm1\0", "ymm2\0", "ymm3\0", "ymm4\0", "ymm5\0", "ymm6\0", "ymm7\0", "ymm8\0", "ymm9\0", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
86};
87static const char g_aszYasmRegCRx[16][5] =
88{
89 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
90};
91static const char g_aszYasmRegDRx[16][5] =
92{
93 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
94};
95static const char g_aszYasmRegTRx[16][5] =
96{
97 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
98};
99
100
101
102/**
103 * Gets the base register name for the given parameter.
104 *
105 * @returns Pointer to the register name.
106 * @param pDis The disassembler state.
107 * @param pParam The parameter.
108 * @param pcchReg Where to store the length of the name.
109 */
110static const char *disasmFormatYasmBaseReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
111{
112 RT_NOREF_PV(pDis);
113
114 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
115 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_YMM
116 | DISUSE_REG_CR | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
117
118 {
119 case DISUSE_REG_GEN8:
120 {
121 Assert(pParam->arch.x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen8));
122 const char *psz = g_aszYasmRegGen8[pParam->arch.x86.Base.idxGenReg];
123 *pcchReg = 2 + !!psz[2] + !!psz[3];
124 return psz;
125 }
126
127 case DISUSE_REG_GEN16:
128 {
129 Assert(pParam->arch.x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
130 const char *psz = g_aszYasmRegGen16[pParam->arch.x86.Base.idxGenReg];
131 *pcchReg = 2 + !!psz[2] + !!psz[3];
132 return psz;
133 }
134
135 // VSIB
136 case DISUSE_REG_XMM | DISUSE_REG_GEN32:
137 case DISUSE_REG_YMM | DISUSE_REG_GEN32:
138 case DISUSE_REG_GEN32:
139 {
140 Assert(pParam->arch.x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
141 const char *psz = g_aszYasmRegGen32[pParam->arch.x86.Base.idxGenReg];
142 *pcchReg = 2 + !!psz[2] + !!psz[3];
143 return psz;
144 }
145
146 // VSIB
147 case DISUSE_REG_XMM | DISUSE_REG_GEN64:
148 case DISUSE_REG_YMM | DISUSE_REG_GEN64:
149 case DISUSE_REG_GEN64:
150 {
151 Assert(pParam->arch.x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
152 const char *psz = g_aszYasmRegGen64[pParam->arch.x86.Base.idxGenReg];
153 *pcchReg = 2 + !!psz[2] + !!psz[3];
154 return psz;
155 }
156
157 case DISUSE_REG_FP:
158 {
159 Assert(pParam->arch.x86.Base.idxFpuReg < RT_ELEMENTS(g_aszYasmRegFP));
160 const char *psz = g_aszYasmRegFP[pParam->arch.x86.Base.idxFpuReg];
161 *pcchReg = 3;
162 return psz;
163 }
164
165 case DISUSE_REG_MMX:
166 {
167 Assert(pParam->arch.x86.Base.idxMmxReg < RT_ELEMENTS(g_aszYasmRegMMX));
168 const char *psz = g_aszYasmRegMMX[pParam->arch.x86.Base.idxMmxReg];
169 *pcchReg = 3;
170 return psz;
171 }
172
173 case DISUSE_REG_XMM:
174 {
175 Assert(pParam->arch.x86.Base.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
176 const char *psz = g_aszYasmRegXMM[pParam->arch.x86.Base.idxXmmReg];
177 *pcchReg = 4 + !!psz[4];
178 return psz;
179 }
180
181 case DISUSE_REG_YMM:
182 {
183 Assert(pParam->arch.x86.Base.idxYmmReg < RT_ELEMENTS(g_aszYasmRegYMM));
184 const char *psz = g_aszYasmRegYMM[pParam->arch.x86.Base.idxYmmReg];
185 *pcchReg = 4 + !!psz[4];
186 return psz;
187 }
188
189 case DISUSE_REG_CR:
190 {
191 Assert(pParam->arch.x86.Base.idxCtrlReg < RT_ELEMENTS(g_aszYasmRegCRx));
192 const char *psz = g_aszYasmRegCRx[pParam->arch.x86.Base.idxCtrlReg];
193 *pcchReg = 3;
194 return psz;
195 }
196
197 case DISUSE_REG_DBG:
198 {
199 Assert(pParam->arch.x86.Base.idxDbgReg < RT_ELEMENTS(g_aszYasmRegDRx));
200 const char *psz = g_aszYasmRegDRx[pParam->arch.x86.Base.idxDbgReg];
201 *pcchReg = 3;
202 return psz;
203 }
204
205 case DISUSE_REG_SEG:
206 {
207 Assert(pParam->arch.x86.Base.idxSegReg < RT_ELEMENTS(g_aszYasmRegCRx));
208 const char *psz = g_aszYasmRegSeg[pParam->arch.x86.Base.idxSegReg];
209 *pcchReg = 2;
210 return psz;
211 }
212
213 case DISUSE_REG_TEST:
214 {
215 Assert(pParam->arch.x86.Base.idxTestReg < RT_ELEMENTS(g_aszYasmRegTRx));
216 const char *psz = g_aszYasmRegTRx[pParam->arch.x86.Base.idxTestReg];
217 *pcchReg = 3;
218 return psz;
219 }
220
221 default:
222 AssertMsgFailed(("%#x\n", pParam->fUse));
223 *pcchReg = 3;
224 return "r??";
225 }
226}
227
228
229/**
230 * Gets the index register name for the given parameter.
231 *
232 * @returns The index register name.
233 * @param pDis The disassembler state.
234 * @param pParam The parameter.
235 * @param pcchReg Where to store the length of the name.
236 */
237static const char *disasmFormatYasmIndexReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
238{
239 if (pParam->fUse & DISUSE_REG_XMM)
240 {
241 Assert(pParam->arch.x86.Index.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
242 const char *psz = g_aszYasmRegXMM[pParam->arch.x86.Index.idxXmmReg];
243 *pcchReg = 4 + !!psz[4];
244 return psz;
245 }
246 else if (pParam->fUse & DISUSE_REG_YMM)
247 {
248 Assert(pParam->arch.x86.Index.idxYmmReg < RT_ELEMENTS(g_aszYasmRegYMM));
249 const char *psz = g_aszYasmRegYMM[pParam->arch.x86.Index.idxYmmReg];
250 *pcchReg = 4 + !!psz[4];
251 return psz;
252
253 }
254 else
255 switch (pDis->arch.x86.uAddrMode)
256 {
257 case DISCPUMODE_16BIT:
258 {
259 Assert(pParam->arch.x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
260 const char *psz = g_aszYasmRegGen16[pParam->arch.x86.Index.idxGenReg];
261 *pcchReg = 2 + !!psz[2] + !!psz[3];
262 return psz;
263 }
264
265 case DISCPUMODE_32BIT:
266 {
267 Assert(pParam->arch.x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
268 const char *psz = g_aszYasmRegGen32[pParam->arch.x86.Index.idxGenReg];
269 *pcchReg = 2 + !!psz[2] + !!psz[3];
270 return psz;
271 }
272
273 case DISCPUMODE_64BIT:
274 {
275 Assert(pParam->arch.x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
276 const char *psz = g_aszYasmRegGen64[pParam->arch.x86.Index.idxGenReg];
277 *pcchReg = 2 + !!psz[2] + !!psz[3];
278 return psz;
279 }
280
281 default:
282 AssertMsgFailed(("%#x %#x\n", pParam->fUse, pDis->arch.x86.uAddrMode));
283 *pcchReg = 3;
284 return "r??";
285 }
286}
287
288
289/**
290 * Formats the current instruction in Yasm (/ Nasm) style.
291 *
292 *
293 * @returns The number of output characters. If this is >= cchBuf, then the content
294 * of pszBuf will be truncated.
295 * @param pDis Pointer to the disassembler state.
296 * @param pszBuf The output buffer.
297 * @param cchBuf The size of the output buffer.
298 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
299 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
300 * @param pvUser User argument for pfnGetSymbol.
301 */
302DISDECL(size_t) DISFormatYasmEx(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
303 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
304{
305/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
306 /*
307 * Input validation and massaging.
308 */
309 AssertPtr(pDis);
310 AssertPtrNull(pszBuf);
311 Assert(pszBuf || !cchBuf);
312 AssertPtrNull(pfnGetSymbol);
313 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
314 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
315 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
316 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
317 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
318
319 PCDISOPCODE const pOp = pDis->pCurInstr;
320
321 /*
322 * Output macros
323 */
324 char *pszDst = pszBuf;
325 size_t cchDst = cchBuf;
326 size_t cchOutput = 0;
327#define PUT_C(ch) \
328 do { \
329 cchOutput++; \
330 if (cchDst > 1) \
331 { \
332 cchDst--; \
333 *pszDst++ = (ch); \
334 } \
335 } while (0)
336#define PUT_STR(pszSrc, cchSrc) \
337 do { \
338 cchOutput += (cchSrc); \
339 if (cchDst > (cchSrc)) \
340 { \
341 memcpy(pszDst, (pszSrc), (cchSrc)); \
342 pszDst += (cchSrc); \
343 cchDst -= (cchSrc); \
344 } \
345 else if (cchDst > 1) \
346 { \
347 memcpy(pszDst, (pszSrc), cchDst - 1); \
348 pszDst += cchDst - 1; \
349 cchDst = 1; \
350 } \
351 } while (0)
352#define PUT_SZ(sz) \
353 PUT_STR((sz), sizeof(sz) - 1)
354#define PUT_SZ_STRICT(szStrict, szRelaxed) \
355 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
356#define PUT_PSZ(psz) \
357 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
358#define PUT_NUM(cch, fmt, num) \
359 do { \
360 cchOutput += (cch); \
361 if (cchDst > 1) \
362 { \
363 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
364 pszDst += cchTmp; \
365 cchDst -= cchTmp; \
366 Assert(cchTmp == (cch) || cchDst == 1); \
367 } \
368 } while (0)
369/** @todo add two flags for choosing between %X / %x and h / 0x. */
370#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
371#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
372#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
373#define PUT_NUM_64(num) PUT_NUM(18, "0%016RX64h", (uint64_t)(num))
374
375#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
376 do { \
377 if ((stype)(num) >= 0) \
378 { \
379 PUT_C('+'); \
380 PUT_NUM(cch, fmt, (utype)(num)); \
381 } \
382 else \
383 { \
384 PUT_C('-'); \
385 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
386 } \
387 } while (0)
388#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0%02xh", num, int8_t, uint8_t)
389#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0%04xh", num, int16_t, uint16_t)
390#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0%08xh", num, int32_t, uint32_t)
391#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0%016RX64h", num, int64_t, uint64_t)
392
393#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
394 do { \
395 if (RT_SUCCESS(a_rcSym)) \
396 { \
397 PUT_SZ(a_szStart); \
398 PUT_PSZ(szSymbol); \
399 if (off != 0) \
400 { \
401 if ((int8_t)off == off) \
402 PUT_NUM_S8(off); \
403 else if ((int16_t)off == off) \
404 PUT_NUM_S16(off); \
405 else if ((int32_t)off == off) \
406 PUT_NUM_S32(off); \
407 else \
408 PUT_NUM_S64(off); \
409 } \
410 PUT_C(a_chEnd); \
411 } \
412 } while (0)
413
414#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
415 do { \
416 if (pfnGetSymbol) \
417 { \
418 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
419 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
420 } \
421 } while (0)
422
423
424 /*
425 * The address?
426 */
427 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
428 {
429#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
430 if (pDis->uInstrAddr >= _4G)
431 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
432#endif
433 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
434 PUT_C(' ');
435 }
436
437 /*
438 * The opcode bytes?
439 */
440 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
441 {
442 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
443 cchOutput += cchTmp;
444 if (cchDst > 1)
445 {
446 if (cchTmp <= cchDst)
447 {
448 cchDst -= cchTmp;
449 pszDst += cchTmp;
450 }
451 else
452 {
453 pszDst += cchDst - 1;
454 cchDst = 1;
455 }
456 }
457
458 /* Some padding to align the instruction. */
459 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
460 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
461 + 2;
462 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
463 PUT_STR(g_szSpaces, cchPadding);
464 }
465
466
467 /*
468 * Filter out invalid opcodes first as they need special
469 * treatment. UD2 is an exception and should be handled normally.
470 */
471 size_t const offInstruction = cchOutput;
472 if ( pOp->uOpcode == OP_INVALID
473 || ( pOp->uOpcode == OP_ILLUD2
474 && (pDis->arch.x86.fPrefix & DISPREFIX_LOCK)))
475 PUT_SZ("Illegal opcode");
476 else
477 {
478 /*
479 * Prefixes
480 */
481 if (pDis->arch.x86.fPrefix & DISPREFIX_LOCK)
482 PUT_SZ("lock ");
483 if (pDis->arch.x86.fPrefix & DISPREFIX_REP)
484 PUT_SZ("rep ");
485 else if(pDis->arch.x86.fPrefix & DISPREFIX_REPNE)
486 PUT_SZ("repne ");
487
488 /*
489 * Adjust the format string to the correct mnemonic
490 * or to avoid things the assembler cannot handle correctly.
491 */
492 char szTmpFmt[48];
493 const char *pszFmt = pOp->pszOpcode;
494 bool fIgnoresOpSize = false;
495 bool fMayNeedAddrSize = false;
496 switch (pOp->uOpcode)
497 {
498 case OP_JECXZ:
499 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "jcxz %Jb" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
500 break;
501 case OP_PUSHF:
502 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "pushfw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "pushfd" : "pushfq";
503 break;
504 case OP_POPF:
505 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "popfw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "popfd" : "popfq";
506 break;
507 case OP_PUSHA:
508 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "pushaw" : "pushad";
509 break;
510 case OP_POPA:
511 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "popaw" : "popad";
512 break;
513 case OP_INSB:
514 pszFmt = "insb";
515 fIgnoresOpSize = fMayNeedAddrSize = true;
516 break;
517 case OP_INSWD:
518 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "insw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "insd" : "insq";
519 fMayNeedAddrSize = true;
520 break;
521 case OP_OUTSB:
522 pszFmt = "outsb";
523 fIgnoresOpSize = fMayNeedAddrSize = true;
524 break;
525 case OP_OUTSWD:
526 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "outsw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "outsd" : "outsq";
527 fMayNeedAddrSize = true;
528 break;
529 case OP_MOVSB:
530 pszFmt = "movsb";
531 fIgnoresOpSize = fMayNeedAddrSize = true;
532 break;
533 case OP_MOVSWD:
534 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "movsw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "movsd" : "movsq";
535 fMayNeedAddrSize = true;
536 break;
537 case OP_CMPSB:
538 pszFmt = "cmpsb";
539 fIgnoresOpSize = fMayNeedAddrSize = true;
540 break;
541 case OP_CMPWD:
542 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "cmpsw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "cmpsd" : "cmpsq";
543 fMayNeedAddrSize = true;
544 break;
545 case OP_SCASB:
546 pszFmt = "scasb";
547 fIgnoresOpSize = fMayNeedAddrSize = true;
548 break;
549 case OP_SCASWD:
550 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "scasw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "scasd" : "scasq";
551 fMayNeedAddrSize = true;
552 break;
553 case OP_LODSB:
554 pszFmt = "lodsb";
555 fIgnoresOpSize = fMayNeedAddrSize = true;
556 break;
557 case OP_LODSWD:
558 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "lodsw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "lodsd" : "lodsq";
559 fMayNeedAddrSize = true;
560 break;
561 case OP_STOSB:
562 pszFmt = "stosb";
563 fIgnoresOpSize = fMayNeedAddrSize = true;
564 break;
565 case OP_STOSWD:
566 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "stosw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "stosd" : "stosq";
567 fMayNeedAddrSize = true;
568 break;
569 case OP_CBW:
570 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "cbw" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "cwde" : "cdqe";
571 break;
572 case OP_CWD:
573 pszFmt = pDis->arch.x86.uOpMode == DISCPUMODE_16BIT ? "cwd" : pDis->arch.x86.uOpMode == DISCPUMODE_32BIT ? "cdq" : "cqo";
574 break;
575 case OP_SHL:
576 Assert(pszFmt[3] == '/');
577 pszFmt += 4;
578 break;
579 case OP_XLAT:
580 pszFmt = "xlatb";
581 break;
582 case OP_INT3:
583 pszFmt = "int3";
584 break;
585
586 /*
587 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
588 */
589 case OP_NOP:
590 if (pDis->arch.x86.bOpCode == 0x90)
591 /* fine, fine */;
592 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
593 pszFmt = "prefetch %Eb";
594 else if (pDis->arch.x86.bOpCode == 0x1f)
595 {
596 Assert(pDis->cbInstr >= 3);
597 PUT_SZ("db 00fh, 01fh,");
598 PUT_NUM_8(MAKE_MODRM(pDis->arch.x86.ModRM.Bits.Mod, pDis->arch.x86.ModRM.Bits.Reg, pDis->arch.x86.ModRM.Bits.Rm));
599 for (unsigned i = 3; i < pDis->cbInstr; i++)
600 {
601 PUT_C(',');
602 PUT_NUM_8(0x90); /// @todo fixme.
603 }
604 pszFmt = "";
605 }
606 break;
607
608 default:
609 /* ST(X) -> stX (floating point) */
610 if (*pszFmt == 'f' && strchr(pszFmt, '('))
611 {
612 char *pszFmtDst = szTmpFmt;
613 char ch;
614 do
615 {
616 ch = *pszFmt++;
617 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
618 {
619 *pszFmtDst++ = 's';
620 *pszFmtDst++ = 't';
621 pszFmt += 2;
622 ch = *pszFmt;
623 Assert(pszFmt[1] == ')');
624 pszFmt += 2;
625 *pszFmtDst++ = ch;
626 }
627 else
628 *pszFmtDst++ = ch;
629 } while (ch != '\0');
630 pszFmt = szTmpFmt;
631 }
632 if (strchr("#@&", *pszFmt))
633 {
634 const char *pszDelim = strchr(pszFmt, '/');
635 const char *pszSpace = (pszDelim ? strchr(pszDelim, ' ') : NULL);
636 if (pszDelim != NULL)
637 {
638 char *pszFmtDst = szTmpFmt;
639 if (pszSpace == NULL) pszSpace = strchr(pszDelim, 0);
640 if ( (*pszFmt == '#' && !pDis->arch.x86.bVexWFlag) /** @todo check this*/
641 || (*pszFmt == '@' && !VEXREG_IS256B(pDis->arch.x86.bVexDestReg))
642 || (*pszFmt == '&' && ( DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
643 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
644 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse)
645 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param4.fUse))))
646 {
647 strncpy(pszFmtDst, pszFmt + 1, pszDelim - pszFmt - 1);
648 pszFmtDst += pszDelim - pszFmt - 1;
649 }
650 else
651 {
652 strncpy(pszFmtDst, pszDelim + 1, pszSpace - pszDelim - 1);
653 pszFmtDst += pszSpace - pszDelim - 1;
654 }
655 strcpy (pszFmtDst, pszSpace);
656 pszFmt = szTmpFmt;
657 }
658 }
659 break;
660
661 /*
662 * Horrible hacks.
663 */
664 case OP_FLD:
665 if (pDis->arch.x86.bOpCode == 0xdb) /* m80fp workaround. */
666 *(int *)&pDis->Param1.arch.x86.fParam &= ~0x1f; /* make it pure OP_PARM_M */
667 break;
668 case OP_LAR: /* hack w -> v, probably not correct. */
669 *(int *)&pDis->Param2.arch.x86.fParam &= ~0x1f;
670 *(int *)&pDis->Param2.arch.x86.fParam |= OP_PARM_v;
671 break;
672 }
673
674 /*
675 * Add operand size and address prefixes for outsb, movsb, etc.
676 */
677 if (pDis->arch.x86.fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
678 {
679 if (fIgnoresOpSize && (pDis->arch.x86.fPrefix & DISPREFIX_OPSIZE) )
680 {
681 if (pDis->uCpuMode == DISCPUMODE_16BIT)
682 PUT_SZ("o32 ");
683 else
684 PUT_SZ("o16 ");
685 }
686 if (fMayNeedAddrSize && (pDis->arch.x86.fPrefix & DISPREFIX_ADDRSIZE) )
687 {
688 if (pDis->uCpuMode == DISCPUMODE_16BIT)
689 PUT_SZ("a32 ");
690 else
691 PUT_SZ("a16 ");
692 }
693 }
694
695 /*
696 * Formatting context and associated macros.
697 */
698 PCDISOPPARAM pParam = &pDis->Param1;
699 int iParam = 1;
700
701#define PUT_FAR() \
702 do { \
703 if ( OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_p \
704 && pOp->uOpcode != OP_LDS /* table bugs? */ \
705 && pOp->uOpcode != OP_LES \
706 && pOp->uOpcode != OP_LFS \
707 && pOp->uOpcode != OP_LGS \
708 && pOp->uOpcode != OP_LSS ) \
709 PUT_SZ("far "); \
710 } while (0)
711 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
712 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
713#define PUT_SIZE_OVERRIDE() \
714 do { \
715 switch (OP_PARM_VSUBTYPE(pParam->arch.x86.fParam)) \
716 { \
717 case OP_PARM_v: \
718 case OP_PARM_y: \
719 switch (pDis->arch.x86.uOpMode) \
720 { \
721 case DISCPUMODE_16BIT: if (OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) != OP_PARM_y) PUT_SZ("word "); break; \
722 case DISCPUMODE_32BIT: \
723 if (pDis->pCurInstr->uOpcode != OP_GATHER || pDis->arch.x86.bVexWFlag) { PUT_SZ("dword "); break; } \
724 RT_FALL_THRU(); \
725 case DISCPUMODE_64BIT: PUT_SZ("qword "); break; \
726 default: break; \
727 } \
728 break; \
729 case OP_PARM_b: PUT_SZ("byte "); break; \
730 case OP_PARM_w: \
731 if ( OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_W \
732 || OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_M) \
733 { \
734 if (VEXREG_IS256B(pDis->arch.x86.bVexDestReg)) PUT_SZ("dword "); \
735 else PUT_SZ("word "); \
736 } \
737 break; \
738 case OP_PARM_d: \
739 if ( OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_W \
740 || OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_M) \
741 { \
742 if (VEXREG_IS256B(pDis->arch.x86.bVexDestReg)) PUT_SZ("qword "); \
743 else PUT_SZ("dword "); \
744 } \
745 break; \
746 case OP_PARM_q: \
747 if ( OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_W \
748 || OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_M) \
749 { \
750 if (VEXREG_IS256B(pDis->arch.x86.bVexDestReg)) PUT_SZ("oword "); \
751 else PUT_SZ("qword "); \
752 } \
753 break; \
754 case OP_PARM_ps: \
755 case OP_PARM_pd: \
756 case OP_PARM_x: if (VEXREG_IS256B(pDis->arch.x86.bVexDestReg)) { PUT_SZ("yword "); break; } RT_FALL_THRU(); \
757 case OP_PARM_ss: \
758 case OP_PARM_sd: \
759 case OP_PARM_dq: PUT_SZ("oword "); break; \
760 case OP_PARM_qq: PUT_SZ("yword "); break; \
761 case OP_PARM_p: break; /* see PUT_FAR */ \
762 case OP_PARM_s: if (pParam->fUse & DISUSE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
763 case OP_PARM_z: break; \
764 case OP_PARM_NONE: \
765 if ( OP_PARM_VTYPE(pParam->arch.x86.fParam) == OP_PARM_M \
766 && ((pParam->fUse & DISUSE_REG_FP) || pOp->uOpcode == OP_FLD)) \
767 PUT_SZ("tword "); \
768 break; \
769 default: break; /*no pointer type specified/necessary*/ \
770 } \
771 } while (0)
772 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
773#define PUT_SEGMENT_OVERRIDE() \
774 do { \
775 if (pDis->arch.x86.fPrefix & DISPREFIX_SEG) \
776 PUT_STR(s_szSegPrefix[pDis->arch.x86.idxSegPrefix], 3); \
777 } while (0)
778
779
780 /*
781 * Segment prefixing for instructions that doesn't do memory access.
782 */
783 if ( (pDis->arch.x86.fPrefix & DISPREFIX_SEG)
784 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
785 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
786 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
787 {
788 PUT_STR(s_szSegPrefix[pDis->arch.x86.idxSegPrefix], 2);
789 PUT_C(' ');
790 }
791
792
793 /*
794 * The formatting loop.
795 */
796 RTINTPTR off;
797 char szSymbol[128];
798 char ch;
799 while ((ch = *pszFmt++) != '\0')
800 {
801 if (ch == '%')
802 {
803 ch = *pszFmt++;
804 switch (ch)
805 {
806 /*
807 * ModRM - Register only / VEX.vvvv.
808 */
809 case 'C': /* Control register (ParseModRM / UseModRM). */
810 case 'D': /* Debug register (ParseModRM / UseModRM). */
811 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
812 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
813 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
814 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
815 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
816 case 'H': /* The VEX.vvvv field of the VEX prefix selects a XMM/YMM register. */
817 case 'B': /* The VEX.vvvv field of the VEX prefix selects a general register (ParseVexDest). */
818 case 'L': /* The upper 4 bits of the 8-bit immediate selects a XMM/YMM register. */
819 {
820 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
821 Assert(!(pParam->fUse & (DISUSE_INDEX | DISUSE_SCALE) /* No SIB here... */));
822 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
823
824 size_t cchReg;
825 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
826 PUT_STR(pszReg, cchReg);
827 break;
828 }
829
830 /*
831 * ModRM - Register or memory.
832 */
833 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
834 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
835 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
836 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
837 case 'U': /* ModRM byte may only refer to a XMM/SSE register (ParseModRM / UseModRM). */
838 case 'M': /* ModRM byte may only refer to memory (ParseModRM / UseModRM). */
839 {
840 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
841
842 PUT_FAR();
843 uint32_t const fUse = pParam->fUse;
844 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
845 {
846 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
847 while the register variants deals with 16, 32 & 64 in the normal fashion. */
848 if ( pParam->arch.x86.fParam != OP_PARM_Ev
849 || pOp->uOpcode != OP_MOV
850 || ( pOp->fParam1 != OP_PARM_Sw
851 && pOp->fParam2 != OP_PARM_Sw))
852 PUT_SIZE_OVERRIDE();
853 PUT_C('[');
854 }
855 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
856 && (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)))
857 {
858 if ( (fUse & DISUSE_DISPLACEMENT8)
859 && !pParam->arch.x86.uDisp.i8)
860 PUT_SZ("byte ");
861 else if ( (fUse & DISUSE_DISPLACEMENT16)
862 && (int8_t)pParam->arch.x86.uDisp.i16 == (int16_t)pParam->arch.x86.uDisp.i16)
863 PUT_SZ("word ");
864 else if ( (fUse & DISUSE_DISPLACEMENT32)
865 && (int16_t)pParam->arch.x86.uDisp.i32 == (int32_t)pParam->arch.x86.uDisp.i32) //??
866 PUT_SZ("dword ");
867 else if ( (fUse & DISUSE_DISPLACEMENT64)
868 && (pDis->arch.x86.SIB.Bits.Base != 5 || pDis->arch.x86.ModRM.Bits.Mod != 0)
869 && (int32_t)pParam->arch.x86.uDisp.i64 == (int64_t)pParam->arch.x86.uDisp.i64) //??
870 PUT_SZ("qword ");
871 }
872 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
873 PUT_SEGMENT_OVERRIDE();
874
875 bool fBase = (fUse & DISUSE_BASE) /* When exactly is DISUSE_BASE supposed to be set? disasmModRMReg doesn't set it. */
876 || ( (fUse & ( DISUSE_REG_GEN8
877 | DISUSE_REG_GEN16
878 | DISUSE_REG_GEN32
879 | DISUSE_REG_GEN64
880 | DISUSE_REG_FP
881 | DISUSE_REG_MMX
882 | DISUSE_REG_XMM
883 | DISUSE_REG_YMM
884 | DISUSE_REG_CR
885 | DISUSE_REG_DBG
886 | DISUSE_REG_SEG
887 | DISUSE_REG_TEST ))
888 && !DISUSE_IS_EFFECTIVE_ADDR(fUse));
889 if (fBase)
890 {
891 size_t cchReg;
892 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
893 PUT_STR(pszReg, cchReg);
894 }
895
896 if (fUse & DISUSE_INDEX)
897 {
898 if (fBase)
899 PUT_C('+');
900
901 size_t cchReg;
902 const char *pszReg = disasmFormatYasmIndexReg(pDis, pParam, &cchReg);
903 PUT_STR(pszReg, cchReg);
904
905 if (fUse & DISUSE_SCALE)
906 {
907 PUT_C('*');
908 PUT_C('0' + pParam->arch.x86.uScale);
909 }
910 }
911 else
912 Assert(!(fUse & DISUSE_SCALE));
913
914 int64_t off2 = 0;
915 if (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32))
916 {
917 if (fUse & DISUSE_DISPLACEMENT8)
918 off2 = pParam->arch.x86.uDisp.i8;
919 else if (fUse & DISUSE_DISPLACEMENT16)
920 off2 = pParam->arch.x86.uDisp.i16;
921 else if (fUse & (DISUSE_DISPLACEMENT32 | DISUSE_RIPDISPLACEMENT32))
922 off2 = pParam->arch.x86.uDisp.i32;
923 else if (fUse & DISUSE_DISPLACEMENT64)
924 off2 = pParam->arch.x86.uDisp.i64;
925 else
926 {
927 AssertFailed();
928 off2 = 0;
929 }
930
931 int64_t off3 = off2;
932 if (fBase || (fUse & (DISUSE_INDEX | DISUSE_RIPDISPLACEMENT32)))
933 {
934 PUT_C(off3 >= 0 ? '+' : '-');
935 if (off3 < 0)
936 off3 = -off3;
937 }
938 if (fUse & DISUSE_DISPLACEMENT8)
939 PUT_NUM_8( off3);
940 else if (fUse & DISUSE_DISPLACEMENT16)
941 PUT_NUM_16(off3);
942 else if (fUse & DISUSE_DISPLACEMENT32)
943 PUT_NUM_32(off3);
944 else if (fUse & DISUSE_DISPLACEMENT64)
945 PUT_NUM_64(off3);
946 else
947 {
948 PUT_NUM_32(off3);
949 PUT_SZ(" wrt rip (");
950 off2 += pDis->uInstrAddr + pDis->cbInstr;
951 PUT_NUM_64(off2);
952 if (pfnGetSymbol)
953 PUT_SYMBOL((pDis->arch.x86.fPrefix & DISPREFIX_SEG)
954 ? DIS_FMT_SEL_FROM_REG(pDis->arch.x86.idxSegPrefix)
955 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
956 pDis->arch.x86.uAddrMode == DISCPUMODE_64BIT
957 ? (uint64_t)off2
958 : pDis->arch.x86.uAddrMode == DISCPUMODE_32BIT
959 ? (uint32_t)off2
960 : (uint16_t)off2,
961 " = ",
962 ')');
963 else
964 PUT_C(')');
965 }
966 }
967
968 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
969 {
970 if (pfnGetSymbol && !fBase && !(fUse & (DISUSE_INDEX | DISUSE_RIPDISPLACEMENT32)) && off2 != 0)
971 PUT_SYMBOL((pDis->arch.x86.fPrefix & DISPREFIX_SEG)
972 ? DIS_FMT_SEL_FROM_REG(pDis->arch.x86.idxSegPrefix)
973 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
974 pDis->arch.x86.uAddrMode == DISCPUMODE_64BIT
975 ? (uint64_t)off2
976 : pDis->arch.x86.uAddrMode == DISCPUMODE_32BIT
977 ? (uint32_t)off2
978 : (uint16_t)off2,
979 " (=",
980 ')');
981 PUT_C(']');
982 }
983 break;
984 }
985
986 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
987 AssertFailed();
988 break;
989
990 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
991 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
992 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
993 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
994 {
995 case DISUSE_IMMEDIATE8:
996 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
997 && ( (pOp->fParam1 >= OP_PARM_REG_GEN8_START && pOp->fParam1 <= OP_PARM_REG_GEN8_END)
998 || (pOp->fParam2 >= OP_PARM_REG_GEN8_START && pOp->fParam2 <= OP_PARM_REG_GEN8_END))
999 )
1000 PUT_SZ("strict byte ");
1001 PUT_NUM_8(pParam->uValue);
1002 break;
1003
1004 case DISUSE_IMMEDIATE16:
1005 if ( pDis->uCpuMode != pDis->arch.x86.uOpMode
1006 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
1007 && ( (int8_t)pParam->uValue == (int16_t)pParam->uValue
1008 || (pOp->fParam1 >= OP_PARM_REG_GEN16_START && pOp->fParam1 <= OP_PARM_REG_GEN16_END)
1009 || (pOp->fParam2 >= OP_PARM_REG_GEN16_START && pOp->fParam2 <= OP_PARM_REG_GEN16_END))
1010 )
1011 )
1012 {
1013 if (OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_b)
1014 PUT_SZ_STRICT("strict byte ", "byte ");
1015 else if ( OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_v
1016 || OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_z)
1017 PUT_SZ_STRICT("strict word ", "word ");
1018 }
1019 PUT_NUM_16(pParam->uValue);
1020 break;
1021
1022 case DISUSE_IMMEDIATE16_SX8:
1023 if ( !(pDis->arch.x86.fPrefix & DISPREFIX_OPSIZE)
1024 || pDis->pCurInstr->uOpcode != OP_PUSH)
1025 PUT_SZ_STRICT("strict byte ", "byte ");
1026 else
1027 PUT_SZ("word ");
1028 PUT_NUM_16(pParam->uValue);
1029 break;
1030
1031 case DISUSE_IMMEDIATE32:
1032 if ( pDis->arch.x86.uOpMode != (pDis->uCpuMode == DISCPUMODE_16BIT ? DISCPUMODE_16BIT : DISCPUMODE_32BIT) /* not perfect */
1033 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
1034 && ( (int8_t)pParam->uValue == (int32_t)pParam->uValue
1035 || (pOp->fParam1 >= OP_PARM_REG_GEN32_START && pOp->fParam1 <= OP_PARM_REG_GEN32_END)
1036 || (pOp->fParam2 >= OP_PARM_REG_GEN32_START && pOp->fParam2 <= OP_PARM_REG_GEN32_END))
1037 )
1038 )
1039 {
1040 if (OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_b)
1041 PUT_SZ_STRICT("strict byte ", "byte ");
1042 else if ( OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_v
1043 || OP_PARM_VSUBTYPE(pParam->arch.x86.fParam) == OP_PARM_z)
1044 PUT_SZ_STRICT("strict dword ", "dword ");
1045 }
1046 PUT_NUM_32(pParam->uValue);
1047 if (pDis->uCpuMode == DISCPUMODE_32BIT)
1048 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uValue, " (=", ')');
1049 break;
1050
1051 case DISUSE_IMMEDIATE32_SX8:
1052 if ( !(pDis->arch.x86.fPrefix & DISPREFIX_OPSIZE)
1053 || pDis->pCurInstr->uOpcode != OP_PUSH)
1054 PUT_SZ_STRICT("strict byte ", "byte ");
1055 else
1056 PUT_SZ("dword ");
1057 PUT_NUM_32(pParam->uValue);
1058 break;
1059
1060 case DISUSE_IMMEDIATE64_SX8:
1061 if ( !(pDis->arch.x86.fPrefix & DISPREFIX_OPSIZE)
1062 || pDis->pCurInstr->uOpcode != OP_PUSH)
1063 PUT_SZ_STRICT("strict byte ", "byte ");
1064 else
1065 PUT_SZ("qword ");
1066 PUT_NUM_64(pParam->uValue);
1067 break;
1068
1069 case DISUSE_IMMEDIATE64:
1070 PUT_NUM_64(pParam->uValue);
1071 break;
1072
1073 default:
1074 AssertFailed();
1075 break;
1076 }
1077 break;
1078
1079 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
1080 {
1081 int32_t offDisplacement;
1082 Assert(iParam == 1);
1083 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
1084 && pOp->uOpcode != OP_CALL
1085 && pOp->uOpcode != OP_LOOP
1086 && pOp->uOpcode != OP_LOOPE
1087 && pOp->uOpcode != OP_LOOPNE
1088 && pOp->uOpcode != OP_JECXZ;
1089 if (pOp->uOpcode == OP_CALL)
1090 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
1091
1092 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
1093 {
1094 if (fPrefix)
1095 PUT_SZ("short ");
1096 offDisplacement = (int8_t)pParam->uValue;
1097 Assert(*pszFmt == 'b'); pszFmt++;
1098
1099 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1100 PUT_NUM_S8(offDisplacement);
1101 }
1102 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
1103 {
1104 if (fPrefix)
1105 PUT_SZ("near ");
1106 offDisplacement = (int16_t)pParam->uValue;
1107 Assert(*pszFmt == 'v'); pszFmt++;
1108
1109 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1110 PUT_NUM_S16(offDisplacement);
1111 }
1112 else
1113 {
1114 if (fPrefix)
1115 PUT_SZ("near ");
1116 offDisplacement = (int32_t)pParam->uValue;
1117 Assert(pParam->fUse & (DISUSE_IMMEDIATE32_REL | DISUSE_IMMEDIATE64_REL));
1118 Assert(*pszFmt == 'v'); pszFmt++;
1119
1120 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1121 PUT_NUM_S32(offDisplacement);
1122 }
1123 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1124 PUT_SZ(" (");
1125
1126 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
1127 if (pDis->uCpuMode == DISCPUMODE_16BIT)
1128 PUT_NUM_16(uTrgAddr);
1129 else if (pDis->uCpuMode == DISCPUMODE_32BIT)
1130 PUT_NUM_32(uTrgAddr);
1131 else
1132 PUT_NUM_64(uTrgAddr);
1133
1134 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1135 {
1136 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
1137 PUT_C(')');
1138 }
1139 else
1140 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
1141 break;
1142 }
1143
1144 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
1145 {
1146 Assert(*pszFmt == 'p'); pszFmt++;
1147 PUT_FAR();
1148 PUT_SIZE_OVERRIDE();
1149 PUT_SEGMENT_OVERRIDE();
1150 off = 0;
1151 int rc = VERR_SYMBOL_NOT_FOUND;
1152 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1153 {
1154 case DISUSE_IMMEDIATE_ADDR_16_16:
1155 PUT_NUM_16(pParam->uValue >> 16);
1156 PUT_C(':');
1157 PUT_NUM_16(pParam->uValue);
1158 if (pfnGetSymbol)
1159 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1160 break;
1161 case DISUSE_IMMEDIATE_ADDR_16_32:
1162 PUT_NUM_16(pParam->uValue >> 32);
1163 PUT_C(':');
1164 PUT_NUM_32(pParam->uValue);
1165 if (pfnGetSymbol)
1166 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1167 break;
1168 case DISUSE_DISPLACEMENT16:
1169 PUT_NUM_16(pParam->uValue);
1170 if (pfnGetSymbol)
1171 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1172 break;
1173 case DISUSE_DISPLACEMENT32:
1174 PUT_NUM_32(pParam->uValue);
1175 if (pfnGetSymbol)
1176 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1177 break;
1178 case DISUSE_DISPLACEMENT64:
1179 PUT_NUM_64(pParam->uValue);
1180 if (pfnGetSymbol)
1181 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint64_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1182 break;
1183 default:
1184 AssertFailed();
1185 break;
1186 }
1187
1188 PUT_SYMBOL_TWO(rc, " [", ']');
1189 break;
1190 }
1191
1192 case 'O': /* No ModRM byte (ParseImmAddr). */
1193 {
1194 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1195 PUT_FAR();
1196 PUT_SIZE_OVERRIDE();
1197 PUT_C('[');
1198 PUT_SEGMENT_OVERRIDE();
1199 off = 0;
1200 int rc = VERR_SYMBOL_NOT_FOUND;
1201 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1202 {
1203 case DISUSE_IMMEDIATE_ADDR_16_16:
1204 PUT_NUM_16(pParam->uValue >> 16);
1205 PUT_C(':');
1206 PUT_NUM_16(pParam->uValue);
1207 if (pfnGetSymbol)
1208 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1209 break;
1210 case DISUSE_IMMEDIATE_ADDR_16_32:
1211 PUT_NUM_16(pParam->uValue >> 32);
1212 PUT_C(':');
1213 PUT_NUM_32(pParam->uValue);
1214 if (pfnGetSymbol)
1215 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1216 break;
1217 case DISUSE_DISPLACEMENT16:
1218 PUT_NUM_16(pParam->arch.x86.uDisp.i16);
1219 if (pfnGetSymbol)
1220 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->arch.x86.uDisp.u16, szSymbol, sizeof(szSymbol), &off, pvUser);
1221 break;
1222 case DISUSE_DISPLACEMENT32:
1223 PUT_NUM_32(pParam->arch.x86.uDisp.i32);
1224 if (pfnGetSymbol)
1225 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->arch.x86.uDisp.u32, szSymbol, sizeof(szSymbol), &off, pvUser);
1226 break;
1227 case DISUSE_DISPLACEMENT64:
1228 PUT_NUM_64(pParam->arch.x86.uDisp.i64);
1229 if (pfnGetSymbol)
1230 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->arch.x86.uDisp.u64, szSymbol, sizeof(szSymbol), &off, pvUser);
1231 break;
1232 default:
1233 AssertFailed();
1234 break;
1235 }
1236 PUT_C(']');
1237
1238 PUT_SYMBOL_TWO(rc, " (", ')');
1239 break;
1240 }
1241
1242 case 'X': /* DS:SI (ParseXb, ParseXv). */
1243 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1244 {
1245 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1246 PUT_FAR();
1247 PUT_SIZE_OVERRIDE();
1248 PUT_C('[');
1249 if (pParam->fUse & DISUSE_POINTER_DS_BASED)
1250 PUT_SZ("ds:");
1251 else
1252 PUT_SZ("es:");
1253
1254 size_t cchReg;
1255 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1256 PUT_STR(pszReg, cchReg);
1257 PUT_C(']');
1258 break;
1259 }
1260
1261 case 'e': /* Register based on operand size (e.g. %eAX, %eAH) (ParseFixedReg). */
1262 {
1263 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2]));
1264 pszFmt += 2;
1265 size_t cchReg;
1266 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1267 PUT_STR(pszReg, cchReg);
1268 break;
1269 }
1270
1271 default:
1272 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1273 break;
1274 }
1275 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1276 }
1277 else
1278 {
1279 PUT_C(ch);
1280 if (ch == ',')
1281 {
1282 Assert(*pszFmt != ' ');
1283 PUT_C(' ');
1284 switch (++iParam)
1285 {
1286 case 2: pParam = &pDis->Param2; break;
1287 case 3: pParam = &pDis->Param3; break;
1288 case 4: pParam = &pDis->Param4; break;
1289 default: pParam = NULL; break;
1290 }
1291 }
1292 }
1293 } /* while more to format */
1294 }
1295
1296 /*
1297 * Any additional output to the right of the instruction?
1298 */
1299 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1300 {
1301 /* some up front padding. */
1302 size_t cchPadding = cchOutput - offInstruction;
1303 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1304 PUT_STR(g_szSpaces, cchPadding);
1305
1306 /* comment? */
1307 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1308 PUT_SZ(";");
1309
1310 /*
1311 * The address?
1312 */
1313 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1314 {
1315 PUT_C(' ');
1316#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
1317 if (pDis->uInstrAddr >= _4G)
1318 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
1319#endif
1320 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
1321 }
1322
1323 /*
1324 * Opcode bytes?
1325 */
1326 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1327 {
1328 PUT_C(' ');
1329 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
1330 cchOutput += cchTmp;
1331 if (cchTmp >= cchDst)
1332 cchTmp = cchDst - (cchDst != 0);
1333 cchDst -= cchTmp;
1334 pszDst += cchTmp;
1335 }
1336 }
1337
1338 /*
1339 * Terminate it - on overflow we'll have reserved one byte for this.
1340 */
1341 if (cchDst > 0)
1342 *pszDst = '\0';
1343 else
1344 Assert(!cchBuf);
1345
1346 /* clean up macros */
1347#undef PUT_PSZ
1348#undef PUT_SZ
1349#undef PUT_STR
1350#undef PUT_C
1351 return cchOutput;
1352}
1353
1354
1355/**
1356 * Formats the current instruction in Yasm (/ Nasm) style.
1357 *
1358 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1359 *
1360 *
1361 * @returns The number of output characters. If this is >= cchBuf, then the content
1362 * of pszBuf will be truncated.
1363 * @param pDis Pointer to the disassembler state.
1364 * @param pszBuf The output buffer.
1365 * @param cchBuf The size of the output buffer.
1366 */
1367DISDECL(size_t) DISFormatYasm(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
1368{
1369 return DISFormatYasmEx(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
1370}
1371
1372
1373/**
1374 * Checks if the encoding of the given disassembled instruction is something we
1375 * can never get YASM to produce.
1376 *
1377 * @returns true if it's odd, false if it isn't.
1378 * @param pDis The disassembler output. The byte fetcher callback will
1379 * be used if present as we might need to fetch opcode
1380 * bytes.
1381 */
1382DISDECL(bool) DISFormatYasmIsOddEncoding(PDISSTATE pDis)
1383{
1384 /*
1385 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1386 */
1387 if ( pDis->arch.x86.uAddrMode != DISCPUMODE_16BIT /// @todo correct?
1388 && pDis->arch.x86.ModRM.Bits.Rm == 4
1389 && pDis->arch.x86.ModRM.Bits.Mod != 3)
1390 {
1391 /* No scaled index SIB (index=4), except for ESP. */
1392 if ( pDis->arch.x86.SIB.Bits.Index == 4
1393 && pDis->arch.x86.SIB.Bits.Base != 4)
1394 return true;
1395
1396 /* EBP + displacement */
1397 if ( pDis->arch.x86.ModRM.Bits.Mod != 0
1398 && pDis->arch.x86.SIB.Bits.Base == 5
1399 && pDis->arch.x86.SIB.Bits.Scale == 0)
1400 return true;
1401 }
1402
1403 /*
1404 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1405 */
1406 if ( pDis->pCurInstr->uOpcode == OP_SHL
1407 && pDis->arch.x86.ModRM.Bits.Reg == 6)
1408 return true;
1409
1410 /*
1411 * Check for multiple prefixes of the same kind.
1412 */
1413 uint8_t off1stSeg = UINT8_MAX;
1414 uint8_t offOpSize = UINT8_MAX;
1415 uint8_t offAddrSize = UINT8_MAX;
1416 uint32_t fPrefixes = 0;
1417 for (uint32_t offOpcode = 0; offOpcode < RT_ELEMENTS(pDis->u.abInstr); offOpcode++)
1418 {
1419 uint32_t f;
1420 switch (pDis->u.abInstr[offOpcode])
1421 {
1422 case 0xf0:
1423 f = DISPREFIX_LOCK;
1424 break;
1425
1426 case 0xf2:
1427 case 0xf3:
1428 f = DISPREFIX_REP; /* yes, both */
1429 break;
1430
1431 case 0x2e:
1432 case 0x3e:
1433 case 0x26:
1434 case 0x36:
1435 case 0x64:
1436 case 0x65:
1437 if (off1stSeg == UINT8_MAX)
1438 off1stSeg = offOpcode;
1439 f = DISPREFIX_SEG;
1440 break;
1441
1442 case 0x66:
1443 if (offOpSize == UINT8_MAX)
1444 offOpSize = offOpcode;
1445 f = DISPREFIX_OPSIZE;
1446 break;
1447
1448 case 0x67:
1449 if (offAddrSize == UINT8_MAX)
1450 offAddrSize = offOpcode;
1451 f = DISPREFIX_ADDRSIZE;
1452 break;
1453
1454 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1455 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1456 f = pDis->uCpuMode == DISCPUMODE_64BIT ? DISPREFIX_REX : 0;
1457 break;
1458
1459 default:
1460 f = 0;
1461 break;
1462 }
1463 if (!f)
1464 break; /* done */
1465 if (fPrefixes & f)
1466 return true;
1467 fPrefixes |= f;
1468 }
1469
1470 /* segment overrides are fun */
1471 if (fPrefixes & DISPREFIX_SEG)
1472 {
1473 /* no effective address which it may apply to. */
1474 Assert((pDis->arch.x86.fPrefix & DISPREFIX_SEG) || pDis->uCpuMode == DISCPUMODE_64BIT);
1475 if ( !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
1476 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
1477 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
1478 return true;
1479
1480 /* Yasm puts the segment prefixes before the operand prefix with no
1481 way of overriding it. */
1482 if (offOpSize < off1stSeg)
1483 return true;
1484 }
1485
1486 /* fixed register + addr override doesn't go down all that well. */
1487 if (fPrefixes & DISPREFIX_ADDRSIZE)
1488 {
1489 Assert(pDis->arch.x86.fPrefix & DISPREFIX_ADDRSIZE);
1490 if ( pDis->pCurInstr->fParam3 == OP_PARM_NONE
1491 && pDis->pCurInstr->fParam2 == OP_PARM_NONE
1492 && ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1493 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END))
1494 return true;
1495 }
1496
1497 /* Almost all prefixes are bad for jumps. */
1498 if (fPrefixes)
1499 {
1500 switch (pDis->pCurInstr->uOpcode)
1501 {
1502 /* nop w/ prefix(es). */
1503 case OP_NOP:
1504 return true;
1505
1506 case OP_JMP:
1507 if ( pDis->pCurInstr->fParam1 != OP_PARM_Jb
1508 && pDis->pCurInstr->fParam1 != OP_PARM_Jv)
1509 break;
1510 RT_FALL_THRU();
1511 case OP_JO:
1512 case OP_JNO:
1513 case OP_JC:
1514 case OP_JNC:
1515 case OP_JE:
1516 case OP_JNE:
1517 case OP_JBE:
1518 case OP_JNBE:
1519 case OP_JS:
1520 case OP_JNS:
1521 case OP_JP:
1522 case OP_JNP:
1523 case OP_JL:
1524 case OP_JNL:
1525 case OP_JLE:
1526 case OP_JNLE:
1527 /** @todo branch hinting 0x2e/0x3e... */
1528 return true;
1529 }
1530
1531 }
1532
1533 /* All but the segment prefix is bad news for push/pop. */
1534 if (fPrefixes & ~DISPREFIX_SEG)
1535 {
1536 switch (pDis->pCurInstr->uOpcode)
1537 {
1538 case OP_POP:
1539 case OP_PUSH:
1540 if ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_SEG_START
1541 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_SEG_END)
1542 return true;
1543 if ( (fPrefixes & ~DISPREFIX_OPSIZE)
1544 && pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1545 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END)
1546 return true;
1547 break;
1548
1549 case OP_POPA:
1550 case OP_POPF:
1551 case OP_PUSHA:
1552 case OP_PUSHF:
1553 if (fPrefixes & ~DISPREFIX_OPSIZE)
1554 return true;
1555 break;
1556 }
1557 }
1558
1559 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1560 if ( (fPrefixes & DISPREFIX_OPSIZE)
1561 && ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1562 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1563 || ( pDis->pCurInstr->fParam2 == OP_PARM_Gb /* r8 */
1564 && pDis->pCurInstr->fParam1 == OP_PARM_Eb /* r8/mem8 */))
1565 )
1566 {
1567 switch (pDis->pCurInstr->uOpcode)
1568 {
1569 case OP_ADD:
1570 case OP_OR:
1571 case OP_ADC:
1572 case OP_SBB:
1573 case OP_AND:
1574 case OP_SUB:
1575 case OP_XOR:
1576 case OP_CMP:
1577 return true;
1578 default:
1579 break;
1580 }
1581 }
1582
1583 /* Instructions taking no address or operand which thus may be annoyingly
1584 difficult to format for yasm. */
1585 if (fPrefixes)
1586 {
1587 switch (pDis->pCurInstr->uOpcode)
1588 {
1589 case OP_STI:
1590 case OP_STC:
1591 case OP_CLI:
1592 case OP_CLD:
1593 case OP_CLC:
1594 case OP_INT:
1595 case OP_INT3:
1596 case OP_INTO:
1597 case OP_HLT:
1598 /** @todo Many more to can be added here. */
1599 return true;
1600 default:
1601 break;
1602 }
1603 }
1604
1605 /* FPU and other instructions that ignores operand size override. */
1606 if (fPrefixes & DISPREFIX_OPSIZE)
1607 {
1608 switch (pDis->pCurInstr->uOpcode)
1609 {
1610 /* FPU: */
1611 case OP_FIADD:
1612 case OP_FIMUL:
1613 case OP_FISUB:
1614 case OP_FISUBR:
1615 case OP_FIDIV:
1616 case OP_FIDIVR:
1617 /** @todo there are many more. */
1618 return true;
1619
1620 case OP_MOV:
1621 /** @todo could be that we're not disassembling these correctly. */
1622 if (pDis->pCurInstr->fParam1 == OP_PARM_Sw)
1623 return true;
1624 /** @todo what about the other way? */
1625 break;
1626
1627 default:
1628 break;
1629 }
1630 }
1631
1632
1633 /*
1634 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1635 *
1636 * For example:
1637 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1638 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1639 */
1640 if (pDis->arch.x86.ModRM.Bits.Mod == 3 /* reg,reg */)
1641 {
1642 switch (pDis->pCurInstr->uOpcode)
1643 {
1644 case OP_ADD:
1645 case OP_OR:
1646 case OP_ADC:
1647 case OP_SBB:
1648 case OP_AND:
1649 case OP_SUB:
1650 case OP_XOR:
1651 case OP_CMP:
1652 if ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1653 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1654 || ( pDis->pCurInstr->fParam1 == OP_PARM_Gv /* rX */
1655 && pDis->pCurInstr->fParam2 == OP_PARM_Ev /* rX/memX */))
1656 return true;
1657
1658 /* 82 (see table A-6). */
1659 if (pDis->arch.x86.bOpCode == 0x82)
1660 return true;
1661 break;
1662
1663 /* ff /0, fe /0, ff /1, fe /0 */
1664 case OP_DEC:
1665 case OP_INC:
1666 return true;
1667
1668 case OP_POP:
1669 case OP_PUSH:
1670 Assert(pDis->arch.x86.bOpCode == 0x8f);
1671 return true;
1672
1673 case OP_MOV:
1674 if ( pDis->arch.x86.bOpCode == 0x8a
1675 || pDis->arch.x86.bOpCode == 0x8b)
1676 return true;
1677 break;
1678
1679 default:
1680 break;
1681 }
1682 }
1683
1684 /* shl eax,1 will be assembled to the form without the immediate byte. */
1685 if ( pDis->pCurInstr->fParam2 == OP_PARM_Ib
1686 && (uint8_t)pDis->Param2.uValue == 1)
1687 {
1688 switch (pDis->pCurInstr->uOpcode)
1689 {
1690 case OP_SHL:
1691 case OP_SHR:
1692 case OP_SAR:
1693 case OP_RCL:
1694 case OP_RCR:
1695 case OP_ROL:
1696 case OP_ROR:
1697 return true;
1698 }
1699 }
1700
1701 /* And some more - see table A-6. */
1702 if (pDis->arch.x86.bOpCode == 0x82)
1703 {
1704 switch (pDis->pCurInstr->uOpcode)
1705 {
1706 case OP_ADD:
1707 case OP_OR:
1708 case OP_ADC:
1709 case OP_SBB:
1710 case OP_AND:
1711 case OP_SUB:
1712 case OP_XOR:
1713 case OP_CMP:
1714 return true;
1715 break;
1716 }
1717 }
1718
1719
1720 /* check for REX.X = 1 without SIB. */
1721
1722 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1723 says (intel doesn't appear to care). */
1724 switch (pDis->pCurInstr->uOpcode)
1725 {
1726 case OP_SETO:
1727 case OP_SETNO:
1728 case OP_SETC:
1729 case OP_SETNC:
1730 case OP_SETE:
1731 case OP_SETNE:
1732 case OP_SETBE:
1733 case OP_SETNBE:
1734 case OP_SETS:
1735 case OP_SETNS:
1736 case OP_SETP:
1737 case OP_SETNP:
1738 case OP_SETL:
1739 case OP_SETNL:
1740 case OP_SETLE:
1741 case OP_SETNLE:
1742 AssertMsg(pDis->arch.x86.bOpCode >= 0x90 && pDis->arch.x86.bOpCode <= 0x9f, ("%#x\n", pDis->arch.x86.bOpCode));
1743 if (pDis->arch.x86.ModRM.Bits.Reg != 2)
1744 return true;
1745 break;
1746 }
1747
1748 /*
1749 * The MOVZX reg32,mem16 instruction without an operand size prefix
1750 * doesn't quite make sense...
1751 */
1752 if ( pDis->pCurInstr->uOpcode == OP_MOVZX
1753 && pDis->arch.x86.bOpCode == 0xB7
1754 && (pDis->uCpuMode == DISCPUMODE_16BIT) != !!(fPrefixes & DISPREFIX_OPSIZE))
1755 return true;
1756
1757 return false;
1758}
1759
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette