VirtualBox

source: vbox/trunk/src/VBox/Disassembler/testcase/tstDisasm-2.cpp@ 41781

Last change on this file since 41781 was 41781, checked in by vboxsync, 13 years ago

DIS: Prefetch instruction bytes before starting to disassemble, inline all fetchers. Poison the state a bit in strict builds.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 20.9 KB
Line 
1/* $Id: tstDisasm-2.cpp 41781 2012-06-16 19:02:30Z vboxsync $ */
2/** @file
3 * Testcase - Generic Disassembler Tool.
4 */
5
6/*
7 * Copyright (C) 2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#include <VBox/dis.h>
22#include <VBox/err.h>
23#include <iprt/alloc.h>
24#include <iprt/assert.h>
25#include <iprt/initterm.h>
26#include <iprt/getopt.h>
27#include <iprt/file.h>
28#include <iprt/path.h>
29#include <iprt/stream.h>
30#include <iprt/string.h>
31#include <iprt/ctype.h>
32
33
34/*******************************************************************************
35* Structures and Typedefs *
36*******************************************************************************/
37typedef enum { kAsmStyle_Default, kAsmStyle_yasm, kAsmStyle_masm, kAsmStyle_gas, kAsmStyle_invalid } ASMSTYLE;
38typedef enum { kUndefOp_Fail, kUndefOp_All, kUndefOp_DefineByte, kUndefOp_End } UNDEFOPHANDLING;
39
40typedef struct MYDISSTATE
41{
42 DISCPUSTATE Cpu;
43 uint64_t uAddress; /**< The current instruction address. */
44 uint8_t *pbInstr; /**< The current instruction (pointer). */
45 uint32_t cbInstr; /**< The size of the current instruction. */
46 bool fUndefOp; /**< Whether the current instruction is really an undefined opcode.*/
47 UNDEFOPHANDLING enmUndefOp; /**< How to treat undefined opcodes. */
48 int rc; /**< Set if we hit EOF. */
49 size_t cbLeft; /**< The number of bytes left. (read) */
50 uint8_t *pbNext; /**< The next byte. (read) */
51 uint64_t uNextAddr; /**< The address of the next byte. (read) */
52 char szLine[256]; /**< The disassembler text output. */
53} MYDISSTATE;
54typedef MYDISSTATE *PMYDISSTATE;
55
56
57
58/**
59 * Default style.
60 *
61 * @param pState The disassembler state.
62 */
63static void MyDisasDefaultFormatter(PMYDISSTATE pState)
64{
65 RTPrintf("%s", pState->szLine);
66}
67
68
69/**
70 * Yasm style.
71 *
72 * @param pState The disassembler state.
73 */
74static void MyDisasYasmFormatter(PMYDISSTATE pState)
75{
76 char szTmp[256];
77#if 0
78 /* a very quick hack. */
79 strcpy(szTmp, RTStrStripL(strchr(pState->szLine, ':') + 1));
80
81 char *psz = strrchr(szTmp, '[');
82 *psz = '\0';
83 RTStrStripR(szTmp);
84
85 psz = strstr(szTmp, " ptr ");
86 if (psz)
87 memset(psz, ' ', 5);
88
89 char *pszEnd = strchr(szTmp, '\0');
90 while (pszEnd - &szTmp[0] < 71)
91 *pszEnd++ = ' ';
92 *pszEnd = '\0';
93
94#else
95 size_t cch = DISFormatYasmEx(&pState->Cpu, szTmp, sizeof(szTmp),
96 DIS_FMT_FLAGS_STRICT | DIS_FMT_FLAGS_ADDR_RIGHT | DIS_FMT_FLAGS_ADDR_COMMENT
97 | DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_BYTES_COMMENT | DIS_FMT_FLAGS_BYTES_SPACED,
98 NULL, NULL);
99 Assert(cch < sizeof(szTmp));
100 while (cch < 71)
101 szTmp[cch++] = ' ';
102 szTmp[cch] = '\0';
103#endif
104
105 RTPrintf(" %s ; %s", szTmp, pState->szLine);
106}
107
108
109/**
110 * Masm style.
111 *
112 * @param pState The disassembler state.
113 */
114static void MyDisasMasmFormatter(PMYDISSTATE pState)
115{
116 RTPrintf("masm not implemented: %s", pState->szLine);
117}
118
119
120/**
121 * This is a temporary workaround for catching a few illegal opcodes
122 * that the disassembler is currently letting thru, just enough to make
123 * the assemblers happy.
124 *
125 * We're too close to a release to dare mess with these things now as
126 * they may consequences for performance and let alone introduce bugs.
127 *
128 * @returns true if it's valid. false if it isn't.
129 *
130 * @param pCpu The disassembler output.
131 */
132static bool MyDisasIsValidInstruction(DISCPUSTATE const *pCpu)
133{
134 switch (pCpu->pCurInstr->uOpcode)
135 {
136 /* These doesn't take memory operands. */
137 case OP_MOV_CR:
138 case OP_MOV_DR:
139 case OP_MOV_TR:
140 if (pCpu->ModRM.Bits.Mod != 3)
141 return false;
142 break;
143
144 /* The 0x8f /0 variant of this instruction doesn't get its /r value verified. */
145 case OP_POP:
146 if ( pCpu->bOpCode == 0x8f
147 && pCpu->ModRM.Bits.Reg != 0)
148 return false;
149 break;
150
151 /* The 0xc6 /0 and 0xc7 /0 variants of this instruction don't get their /r values verified. */
152 case OP_MOV:
153 if ( ( pCpu->bOpCode == 0xc6
154 || pCpu->bOpCode == 0xc7)
155 && pCpu->ModRM.Bits.Reg != 0)
156 return false;
157 break;
158
159 default:
160 break;
161 }
162
163 return true;
164}
165
166
167/**
168 * @interface_method_impl{FNDISREADBYTES}
169 */
170static DECLCALLBACK(int) MyDisasInstrRead(PDISCPUSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
171{
172 PMYDISSTATE pState = (PMYDISSTATE)pDis;
173 RTUINTPTR uSrcAddr = pState->Cpu.uInstrAddr + offInstr;
174 if (RT_LIKELY( pState->uNextAddr == uSrcAddr
175 && pState->cbLeft >= cbMinRead))
176 {
177 /*
178 * Straight forward reading.
179 */
180 //size_t cbToRead = cbMaxRead;
181 size_t cbToRead = cbMinRead;
182 memcpy(&pState->Cpu.abInstr[offInstr], pState->pbNext, cbToRead);
183 pState->Cpu.cbCachedInstr = offInstr + cbToRead;
184 pState->pbNext += cbToRead;
185 pState->cbLeft -= cbToRead;
186 pState->uNextAddr += cbToRead;
187 return VINF_SUCCESS;
188 }
189
190 if (pState->uNextAddr == uSrcAddr)
191 {
192 /*
193 * Reading too much.
194 */
195 if (pState->cbLeft > 0)
196 {
197 memcpy(&pState->Cpu.abInstr[offInstr], pState->pbNext, pState->cbLeft);
198 offInstr += (uint8_t)pState->cbLeft;
199 cbMinRead -= (uint8_t)pState->cbLeft;
200 pState->pbNext += pState->cbLeft;
201 pState->uNextAddr += pState->cbLeft;
202 pState->cbLeft = 0;
203 }
204 memset(&pState->Cpu.abInstr[offInstr], 0xcc, cbMinRead);
205 pState->rc = VERR_EOF;
206 }
207 else
208 {
209 /*
210 * Non-sequential read, that's an error.
211 */
212 RTStrmPrintf(g_pStdErr, "Reading before current instruction!\n");
213 memset(&pState->Cpu.abInstr[offInstr], 0x90, cbMinRead);
214 pState->rc = VERR_INTERNAL_ERROR;
215 }
216 pState->Cpu.cbCachedInstr = offInstr + cbMinRead;
217 return pState->rc;
218}
219
220
221/**
222 * Disassembles a block of memory.
223 *
224 * @returns VBox status code.
225 * @param argv0 Program name (for errors and warnings).
226 * @param enmCpuMode The cpu mode to disassemble in.
227 * @param uAddress The address we're starting to disassemble at.
228 * @param uHighlightAddr The address of the instruction that should be
229 * highlighted. Pass UINT64_MAX to keep quiet.
230 * @param pbFile Where to start disassemble.
231 * @param cbFile How much to disassemble.
232 * @param enmStyle The assembly output style.
233 * @param fListing Whether to print in a listing like mode.
234 * @param enmUndefOp How to deal with undefined opcodes.
235 */
236static int MyDisasmBlock(const char *argv0, DISCPUMODE enmCpuMode, uint64_t uAddress,
237 uint64_t uHighlightAddr, uint8_t *pbFile, size_t cbFile,
238 ASMSTYLE enmStyle, bool fListing, UNDEFOPHANDLING enmUndefOp)
239{
240 /*
241 * Initialize the CPU context.
242 */
243 MYDISSTATE State;
244 State.uAddress = uAddress;
245 State.pbInstr = pbFile;
246 State.cbInstr = 0;
247 State.enmUndefOp = enmUndefOp;
248 State.rc = VINF_SUCCESS;
249 State.cbLeft = cbFile;
250 State.pbNext = pbFile;
251 State.uNextAddr = uAddress;
252
253 void (*pfnFormatter)(PMYDISSTATE pState);
254 switch (enmStyle)
255 {
256 case kAsmStyle_Default:
257 pfnFormatter = MyDisasDefaultFormatter;
258 break;
259
260 case kAsmStyle_yasm:
261 RTPrintf(" BITS %d\n", enmCpuMode == DISCPUMODE_16BIT ? 16 : enmCpuMode == DISCPUMODE_32BIT ? 32 : 64);
262 pfnFormatter = MyDisasYasmFormatter;
263 break;
264
265 case kAsmStyle_masm:
266 pfnFormatter = MyDisasMasmFormatter;
267 break;
268
269 default:
270 AssertFailedReturn(VERR_INTERNAL_ERROR);
271 }
272
273 /*
274 * The loop.
275 */
276 int rcRet = VINF_SUCCESS;
277 while (State.cbLeft > 0)
278 {
279 /*
280 * Disassemble it.
281 */
282 State.cbInstr = 0;
283 State.cbLeft += State.pbNext - State.pbInstr;
284 State.uNextAddr = State.uAddress;
285 State.pbNext = State.pbInstr;
286
287 int rc = DISInstrToStrWithReader(State.uAddress, enmCpuMode, MyDisasInstrRead, &State,
288 &State.Cpu, &State.cbInstr, State.szLine, sizeof(State.szLine));
289 if ( RT_SUCCESS(rc)
290 || ( ( rc == VERR_DIS_INVALID_OPCODE
291 || rc == VERR_DIS_GEN_FAILURE)
292 && State.enmUndefOp == kUndefOp_DefineByte))
293 {
294 State.fUndefOp = rc == VERR_DIS_INVALID_OPCODE
295 || rc == VERR_DIS_GEN_FAILURE
296 || State.Cpu.pCurInstr->uOpcode == OP_INVALID
297 || State.Cpu.pCurInstr->uOpcode == OP_ILLUD2
298 || ( State.enmUndefOp == kUndefOp_DefineByte
299 && !MyDisasIsValidInstruction(&State.Cpu));
300 if (State.fUndefOp && State.enmUndefOp == kUndefOp_DefineByte)
301 {
302 if (!State.cbInstr)
303 {
304 State.Cpu.abInstr[0] = 0;
305 State.Cpu.pfnReadBytes(&State.Cpu, 0, 1, 1);
306 State.cbInstr = 1;
307 }
308 RTPrintf(" db");
309 for (unsigned off = 0; off < State.cbInstr; off++)
310 RTPrintf(off ? ", %03xh" : " %03xh", State.Cpu.abInstr[off]);
311 RTPrintf(" ; %s\n", State.szLine);
312 }
313 else if (!State.fUndefOp && State.enmUndefOp == kUndefOp_All)
314 {
315 RTPrintf("%s: error at %#RX64: unexpected valid instruction (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->uOpcode);
316 pfnFormatter(&State);
317 rcRet = VERR_GENERAL_FAILURE;
318 }
319 else if (State.fUndefOp && State.enmUndefOp == kUndefOp_Fail)
320 {
321 RTPrintf("%s: error at %#RX64: undefined opcode (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->uOpcode);
322 pfnFormatter(&State);
323 rcRet = VERR_GENERAL_FAILURE;
324 }
325 else
326 {
327 /* Use db for odd encodings that we can't make the assembler use. */
328 if ( State.enmUndefOp == kUndefOp_DefineByte
329 && DISFormatYasmIsOddEncoding(&State.Cpu))
330 {
331 RTPrintf(" db");
332 for (unsigned off = 0; off < State.cbInstr; off++)
333 RTPrintf(off ? ", %03xh" : " %03xh", State.Cpu.abInstr[off]);
334 RTPrintf(" ; ");
335 }
336
337 pfnFormatter(&State);
338 }
339 }
340 else
341 {
342 State.cbInstr = State.pbNext - State.pbInstr;
343 if (!State.cbLeft)
344 RTPrintf("%s: error at %#RX64: read beyond the end (%Rrc)\n", argv0, State.uAddress, rc);
345 else if (State.cbInstr)
346 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d\n", argv0, State.uAddress, rc, State.cbInstr);
347 else
348 {
349 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d!\n", argv0, State.uAddress, rc, State.cbInstr);
350 if (rcRet == VINF_SUCCESS)
351 rcRet = rc;
352 break;
353 }
354 }
355
356 /* Highlight this instruction? */
357 if (uHighlightAddr - State.uAddress < State.cbInstr)
358 RTPrintf("; ^^^^^^^^^^^^^^^^^^^^^\n");
359
360 /* next */
361 State.uAddress += State.cbInstr;
362 State.pbInstr += State.cbInstr;
363 }
364
365 return rcRet;
366}
367
368/**
369 * Converts a hex char to a number.
370 *
371 * @returns 0..15 on success, -1 on failure.
372 * @param ch The character.
373 */
374static int HexDigitToNum(char ch)
375{
376 switch (ch)
377 {
378 case '0': return 0;
379 case '1': return 1;
380 case '2': return 2;
381 case '3': return 3;
382 case '4': return 4;
383 case '5': return 5;
384 case '6': return 6;
385 case '7': return 7;
386 case '8': return 8;
387 case '9': return 9;
388 case 'A':
389 case 'a': return 0xa;
390 case 'B':
391 case 'b': return 0xb;
392 case 'C':
393 case 'c': return 0xc;
394 case 'D':
395 case 'd': return 0xd;
396 case 'E':
397 case 'e': return 0xe;
398 case 'F':
399 case 'f': return 0xf;
400 default:
401 RTPrintf("error: Invalid hex digig '%c'\n", ch);
402 return -1;
403 }
404}
405
406/**
407 * Prints usage info.
408 *
409 * @returns 1.
410 * @param argv0 The program name.
411 */
412static int Usage(const char *argv0)
413{
414 RTStrmPrintf(g_pStdErr,
415"usage: %s [options] <file1> [file2..fileN]\n"
416" or: %s [options] <-x|--hex-bytes> <hex byte> [more hex..]\n"
417" or: %s <--help|-h>\n"
418"\n"
419"Options:\n"
420" --address|-a <address>\n"
421" The base address. Default: 0\n"
422" --max-bytes|-b <bytes>\n"
423" The maximum number of bytes to disassemble. Default: 1GB\n"
424" --cpumode|-c <16|32|64>\n"
425" The cpu mode. Default: 32\n"
426" --listing|-l, --no-listing|-L\n"
427" Enables or disables listing mode. Default: --no-listing\n"
428" --offset|-o <offset>\n"
429" The file offset at which to start disassembling. Default: 0\n"
430" --style|-s <default|yasm|masm>\n"
431" The assembly output style. Default: default\n"
432" --undef-op|-u <fail|all|db>\n"
433" How to treat undefined opcodes. Default: fail\n"
434 , argv0, argv0);
435 return 1;
436}
437
438
439int main(int argc, char **argv)
440{
441 RTR3InitExe(argc, &argv, 0);
442 const char * const argv0 = RTPathFilename(argv[0]);
443
444 /* options */
445 uint64_t uAddress = 0;
446 uint64_t uHighlightAddr = UINT64_MAX;
447 ASMSTYLE enmStyle = kAsmStyle_Default;
448 UNDEFOPHANDLING enmUndefOp = kUndefOp_Fail;
449 bool fListing = true;
450 DISCPUMODE enmCpuMode = DISCPUMODE_32BIT;
451 RTFOFF off = 0;
452 RTFOFF cbMax = _1G;
453 bool fHexBytes = false;
454
455 /*
456 * Parse arguments.
457 */
458 static const RTGETOPTDEF g_aOptions[] =
459 {
460 { "--address", 'a', RTGETOPT_REQ_UINT64 },
461 { "--cpumode", 'c', RTGETOPT_REQ_UINT32 },
462 { "--bytes", 'b', RTGETOPT_REQ_INT64 },
463 { "--listing", 'l', RTGETOPT_REQ_NOTHING },
464 { "--no-listing", 'L', RTGETOPT_REQ_NOTHING },
465 { "--offset", 'o', RTGETOPT_REQ_INT64 },
466 { "--style", 's', RTGETOPT_REQ_STRING },
467 { "--undef-op", 'u', RTGETOPT_REQ_STRING },
468 { "--hex-bytes", 'x', RTGETOPT_REQ_NOTHING },
469 };
470
471 int ch;
472 RTGETOPTUNION ValueUnion;
473 RTGETOPTSTATE GetState;
474 RTGetOptInit(&GetState, argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), 1, RTGETOPTINIT_FLAGS_OPTS_FIRST);
475 while ( (ch = RTGetOpt(&GetState, &ValueUnion))
476 && ch != VINF_GETOPT_NOT_OPTION)
477 {
478 switch (ch)
479 {
480 case 'a':
481 uAddress = ValueUnion.u64;
482 break;
483
484 case 'b':
485 cbMax = ValueUnion.i64;
486 break;
487
488 case 'c':
489 if (ValueUnion.u32 == 16)
490 enmCpuMode = DISCPUMODE_16BIT;
491 else if (ValueUnion.u32 == 32)
492 enmCpuMode = DISCPUMODE_32BIT;
493 else if (ValueUnion.u32 == 64)
494 enmCpuMode = DISCPUMODE_64BIT;
495 else
496 {
497 RTStrmPrintf(g_pStdErr, "%s: Invalid CPU mode value %RU32\n", argv0, ValueUnion.u32);
498 return 1;
499 }
500 break;
501
502 case 'h':
503 return Usage(argv0);
504
505 case 'l':
506 fListing = true;
507 break;
508
509 case 'L':
510 fListing = false;
511 break;
512
513 case 'o':
514 off = ValueUnion.i64;
515 break;
516
517 case 's':
518 if (!strcmp(ValueUnion.psz, "default"))
519 enmStyle = kAsmStyle_Default;
520 else if (!strcmp(ValueUnion.psz, "yasm"))
521 enmStyle = kAsmStyle_yasm;
522 else if (!strcmp(ValueUnion.psz, "masm"))
523 {
524 enmStyle = kAsmStyle_masm;
525 RTStrmPrintf(g_pStdErr, "%s: masm style isn't implemented yet\n", argv0);
526 return 1;
527 }
528 else
529 {
530 RTStrmPrintf(g_pStdErr, "%s: unknown assembly style: %s\n", argv0, ValueUnion.psz);
531 return 1;
532 }
533 break;
534
535 case 'u':
536 if (!strcmp(ValueUnion.psz, "fail"))
537 enmUndefOp = kUndefOp_Fail;
538 else if (!strcmp(ValueUnion.psz, "all"))
539 enmUndefOp = kUndefOp_All;
540 else if (!strcmp(ValueUnion.psz, "db"))
541 enmUndefOp = kUndefOp_DefineByte;
542 else
543 {
544 RTStrmPrintf(g_pStdErr, "%s: unknown undefined opcode handling method: %s\n", argv0, ValueUnion.psz);
545 return 1;
546 }
547 break;
548
549 case 'x':
550 fHexBytes = true;
551 break;
552
553 case 'V':
554 RTPrintf("$Revision: $\n");
555 return 0;
556
557 default:
558 return RTGetOptPrintError(ch, &ValueUnion);
559 }
560 }
561 int iArg = GetState.iNext - 1; /** @todo Not pretty, add RTGetOptInit flag for this. */
562 if (iArg >= argc)
563 return Usage(argv0);
564
565 int rc = VINF_SUCCESS;
566 if (fHexBytes)
567 {
568 /*
569 * Convert the remaining arguments from a hex byte string into
570 * a buffer that we disassemble.
571 */
572 size_t cb = 0;
573 uint8_t *pb = NULL;
574 for ( ; iArg < argc; iArg++)
575 {
576 char ch2;
577 const char *psz = argv[iArg];
578 while (*psz)
579 {
580 /** @todo this stuff belongs in IPRT, same stuff as mac address reading. Could be reused for IPv6 with a different item size.*/
581 /* skip white space, and for the benefit of linux panics '<' and '>'. */
582 while (RT_C_IS_SPACE(ch2 = *psz) || ch2 == '<' || ch2 == '>')
583 {
584 if (ch2 == '<')
585 uHighlightAddr = uAddress + cb;
586 psz++;
587 }
588 if (!ch2)
589 break;
590
591 /* one digit followed by a space or EOS, or two digits. */
592 int iNum = HexDigitToNum(*psz++);
593 if (iNum == -1)
594 return 1;
595 if (!RT_C_IS_SPACE(ch2 = *psz) && ch2 != '\0' && ch2 != '>')
596 {
597 int iDigit = HexDigitToNum(*psz++);
598 if (iDigit == -1)
599 return 1;
600 iNum = iNum * 16 + iDigit;
601 }
602
603 /* add the byte */
604 if (!(cb % 4 /*64*/))
605 {
606 pb = (uint8_t *)RTMemRealloc(pb, cb + 64);
607 if (!pb)
608 {
609 RTPrintf("%s: error: RTMemRealloc failed\n", argv[0]);
610 return 1;
611 }
612 }
613 pb[cb++] = (uint8_t)iNum;
614 }
615 }
616
617 /*
618 * Disassemble it.
619 */
620 rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, uHighlightAddr, pb, cb, enmStyle, fListing, enmUndefOp);
621 }
622 else
623 {
624 /*
625 * Process the files.
626 */
627 for ( ; iArg < argc; iArg++)
628 {
629 /*
630 * Read the file into memory.
631 */
632 void *pvFile;
633 size_t cbFile;
634 rc = RTFileReadAllEx(argv[iArg], off, cbMax, RTFILE_RDALL_O_DENY_NONE, &pvFile, &cbFile);
635 if (RT_FAILURE(rc))
636 {
637 RTStrmPrintf(g_pStdErr, "%s: %s: %Rrc\n", argv0, argv[iArg], rc);
638 break;
639 }
640
641 /*
642 * Disassemble it.
643 */
644 rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, uHighlightAddr, (uint8_t *)pvFile, cbFile, enmStyle, fListing, enmUndefOp);
645 if (RT_FAILURE(rc))
646 break;
647 }
648 }
649
650 return RT_SUCCESS(rc) ? 0 : 1;
651}
652
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette