VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65947

Last change on this file since 65947 was 65947, checked in by vboxsync, 8 years ago

IEMAllInstructionsPython.py: python 3 fix

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 112.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 65947 2017-03-06 12:47:09Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 65947 $"
35
36# Standard python imports.
37import os
38import re
39import sys
40
41## Only the main script needs to modify the path.
42#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43# 'ValidationKit');
44#sys.path.append(g_ksValidationKitDir);
45#
46#from common import utils; - Windows build boxes doesn't have pywin32.
47
48# Python 3 hacks:
49if sys.version_info[0] >= 3:
50 long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53g_kdX86EFlagsConstants = {
54 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
55 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
56 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
57 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
58 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
59 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
60 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
61 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
62 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
63 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
64 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
65 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
66 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
67 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
68 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
69 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
70 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
71 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
72 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
73 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
74};
75
76## \@op[1-4] locations
77g_kdOpLocations = {
78 'reg': [], ## modrm.reg
79 'rm': [], ## modrm.rm
80 'imm': [], ## immediate instruction data
81 'vvvv': [], ## VEX.vvvv
82
83 # fixed registers.
84 'AL': [],
85 'rAX': [],
86 'rSI': [],
87 'rDI': [],
88 'rFLAGS': [],
89 'CS': [],
90 'DS': [],
91 'ES': [],
92 'FS': [],
93 'GS': [],
94 'SS': [],
95};
96
97## \@op[1-4] types
98##
99## Value fields:
100## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
101## - 1: the location (g_kdOpLocations).
102## - 2: disassembler format string version of the type.
103## - 3: disassembler OP_PARAM_XXX (XXX only).
104##
105## Note! See the A.2.1 in SDM vol 2 for the type names.
106g_kdOpTypes = {
107 # Fixed addresses
108 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
109
110 # ModR/M.rm
111 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
112 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
113 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
114
115 # ModR/M.rm - memory only.
116 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
117
118 # ModR/M.reg
119 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
120 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
121 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
122
123 # Immediate values.
124 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
125 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
126 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
127 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
128 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
129 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
130
131 # Address operands (no ModR/M).
132 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
133 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
134
135 # Relative jump targets
136 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
137 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
138
139 # DS:rSI
140 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
141 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
142 # ES:rDI
143 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
144 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
145
146 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
147
148 # Fixed registers.
149 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
150 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
151 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
152 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
153 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
154 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
155 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
156 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
157};
158
159# IDX_ParseFixedReg
160# IDX_ParseVexDest
161
162
163## IEMFORM_XXX mappings.
164g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
165 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
166 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
167 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
168 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
169 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
170 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
171 'M': ( 'ModR/M', [ 'rm', ], ),
172 'M_REG': ( 'ModR/M', [ 'rm', ], ),
173 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
174 'R': ( 'ModR/M', [ 'reg', ], ),
175 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
176 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
177 'FIXED': ( 'fixed', None, )
178};
179
180## \@oppfx values.
181g_kdPrefixes = {
182 '0x66': [],
183 '0xf3': [],
184 '0xf2': [],
185};
186
187## Special \@opcode tag values.
188g_kdSpecialOpcodes = {
189 '/reg': [],
190 'mr/reg': [],
191 '11 /reg': [],
192 '!11 /reg': [],
193 '11 mr/reg': [],
194 '!11 mr/reg': [],
195};
196
197## Valid values for \@openc
198g_kdEncodings = {
199 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
200 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
201 'prefix': [ None, ], ##< Prefix
202};
203
204## \@opunused, \@opinvalid, \@opinvlstyle
205g_kdInvalidStyles = {
206 'immediate': [], ##< CPU stops decoding immediately after the opcode.
207 'intel-modrm': [], ##< Intel decodes ModR/M.
208 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
209 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
210 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
211};
212
213g_kdCpuNames = {
214 '8086': (),
215 '80186': (),
216 '80286': (),
217 '80386': (),
218 '80486': (),
219};
220
221## \@opcpuid
222g_kdCpuIdFlags = {
223 'vme': 'X86_CPUID_FEATURE_EDX_VME',
224 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
225 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
226 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
227 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
228 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
229 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
230 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
231 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
232 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
233 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
234 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
235 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
236 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
237 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
238 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
239 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
240 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
241 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
242 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
243 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
244 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
245 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
246 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
247 'aes': 'X86_CPUID_FEATURE_ECX_AES',
248 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
249 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
250 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
251 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
252
253 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
254 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
255 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
256 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
257 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
258 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
259 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
260 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
261 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
262 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
263};
264
265## \@ophints values.
266g_kdHints = {
267 'invalid': 'DISOPTYPE_INVALID', ##<
268 'harmless': 'DISOPTYPE_HARMLESS', ##<
269 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
270 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
271 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
272 'portio': 'DISOPTYPE_PORTIO', ##<
273 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
274 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
275 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
276 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
277 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
278 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
279 'illegal': 'DISOPTYPE_ILLEGAL', ##<
280 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
281 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
282 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
283 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
284 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
285 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
286 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
287 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
288 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
289 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
290 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
291 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
292 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
293 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
294 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
295 'ignores_op_size': '', ##< Ignores both operand size prefixes.
296};
297
298
299def _isValidOpcodeByte(sOpcode):
300 """
301 Checks if sOpcode is a valid lower case opcode byte.
302 Returns true/false.
303 """
304 if len(sOpcode) == 4:
305 if sOpcode[:2] == '0x':
306 if sOpcode[2] in '0123456789abcdef':
307 if sOpcode[3] in '0123456789abcdef':
308 return True;
309 return False;
310
311
312class InstructionMap(object):
313 """
314 Instruction map.
315
316 The opcode map provides the lead opcode bytes (empty for the one byte
317 opcode map). An instruction can be member of multiple opcode maps as long
318 as it uses the same opcode value within the map (because of VEX).
319 """
320
321 kdEncodings = {
322 'legacy': [],
323 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
324 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
325 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
326 'xop8': [], ##< XOP prefix with vvvvv = 8
327 'xop9': [], ##< XOP prefix with vvvvv = 9
328 'xop10': [], ##< XOP prefix with vvvvv = 10
329 };
330 ## Selectors.
331 ## The first value is the number of table entries required by a
332 ## decoder or disassembler for this type of selector.
333 kdSelectors = {
334 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
335 '/r': [ 8, ], ##< modrm.reg selects the instruction.
336 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
337 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
338 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
339 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
340 };
341
342 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
343 assert sSelector in self.kdSelectors;
344 assert sEncoding in self.kdEncodings;
345 if asLeadOpcodes is None:
346 asLeadOpcodes = [];
347 else:
348 for sOpcode in asLeadOpcodes:
349 assert _isValidOpcodeByte(sOpcode);
350 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
351
352 self.sName = sName;
353 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
354 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
355 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
356 self.aoInstructions = []; # type: Instruction
357 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
358
359 def getTableSize(self):
360 """
361 Number of table entries. This corresponds directly to the selector.
362 """
363 return self.kdSelectors[self.sSelector][0];
364
365 def getInstructionIndex(self, oInstr):
366 """
367 Returns the table index for the instruction.
368 """
369 bOpcode = oInstr.getOpcodeByte();
370
371 # The byte selector is simple. We need a full opcode byte and need just return it.
372 if self.sSelector == 'byte':
373 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
374 return bOpcode;
375
376 # The other selectors needs masking and shifting.
377 if self.sSelector == '/r':
378 return (bOpcode >> 3) & 0x7;
379
380 if self.sSelector == 'mod /r':
381 return (bOpcode >> 3) & 0x1f;
382
383 if self.sSelector == '!11 /r':
384 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
385 return (bOpcode >> 3) & 0x7;
386
387 if self.sSelector == '11 /r':
388 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
389 return (bOpcode >> 3) & 0x7;
390
391 if self.sSelector == '11':
392 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
393 return bOpcode & 0x3f;
394
395 assert False, self.sSelector;
396 return -1;
397
398 def getInstructionsInTableOrder(self):
399 """
400 Get instructions in table order.
401
402 Returns array of instructions. Normally there is exactly one
403 instruction per entry. However the entry could also be None if
404 not instruction was specified for that opcode value. Or there
405 could be a list of instructions to deal with special encodings
406 where for instance prefix (e.g. REX.W) encodes a different
407 instruction or different CPUs have different instructions or
408 prefixes in the same place.
409 """
410 # Start with empty table.
411 cTable = self.getTableSize();
412 aoTable = [None] * cTable;
413
414 # Insert the instructions.
415 for oInstr in self.aoInstructions:
416 if oInstr.sOpcode:
417 idxOpcode = self.getInstructionIndex(oInstr);
418 assert idxOpcode < cTable, str(idxOpcode);
419
420 oExisting = aoTable[idxOpcode];
421 if oExisting is None:
422 aoTable[idxOpcode] = oInstr;
423 elif not isinstance(oExisting, list):
424 aoTable[idxOpcode] = list([oExisting, oInstr]);
425 else:
426 oExisting.append(oInstr);
427
428 return aoTable;
429
430
431 def getDisasTableName(self):
432 """
433 Returns the disassembler table name for this map.
434 """
435 sName = 'g_aDisas';
436 for sWord in self.sName.split('_'):
437 if sWord == 'm': # suffix indicating modrm.mod==mem
438 sName += '_m';
439 elif sWord == 'r': # suffix indicating modrm.mod==reg
440 sName += '_r';
441 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
442 sName += '_' + sWord;
443 else:
444 sWord = sWord.replace('grp', 'Grp');
445 sWord = sWord.replace('map', 'Map');
446 sName += sWord[0].upper() + sWord[1:];
447 return sName;
448
449
450class TestType(object):
451 """
452 Test value type.
453
454 This base class deals with integer like values. The fUnsigned constructor
455 parameter indicates the default stance on zero vs sign extending. It is
456 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
457 """
458 def __init__(self, sName, acbSizes = None, fUnsigned = True):
459 self.sName = sName;
460 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
461 self.fUnsigned = fUnsigned;
462
463 class BadValue(Exception):
464 """ Bad value exception. """
465 def __init__(self, sMessage):
466 Exception.__init__(self, sMessage);
467 self.sMessage = sMessage;
468
469 ## For ascii ~ operator.
470 kdHexInv = {
471 '0': 'f',
472 '1': 'e',
473 '2': 'd',
474 '3': 'c',
475 '4': 'b',
476 '5': 'a',
477 '6': '9',
478 '7': '8',
479 '8': '7',
480 '9': '6',
481 'a': '5',
482 'b': '4',
483 'c': '3',
484 'd': '2',
485 'e': '1',
486 'f': '0',
487 };
488
489 def get(self, sValue):
490 """
491 Get the shortest normal sized byte representation of oValue.
492
493 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
494 The latter form is for AND+OR pairs where the first entry is what to
495 AND with the field and the second the one or OR with.
496
497 Raises BadValue if invalid value.
498 """
499 if len(sValue) == 0:
500 raise TestType.BadValue('empty value');
501
502 # Deal with sign and detect hexadecimal or decimal.
503 fSignExtend = not self.fUnsigned;
504 if sValue[0] == '-' or sValue[0] == '+':
505 fSignExtend = True;
506 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
507 else:
508 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
509
510 # try convert it to long integer.
511 try:
512 iValue = long(sValue, 16 if fHex else 10);
513 except Exception as oXcpt:
514 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
515
516 # Convert the hex string and pad it to a decent value. Negative values
517 # needs to be manually converted to something non-negative (~-n + 1).
518 if iValue >= 0:
519 sHex = hex(iValue);
520 if sys.version_info[0] < 3:
521 assert sHex[-1] == 'L';
522 sHex = sHex[:-1];
523 assert sHex[:2] == '0x';
524 sHex = sHex[2:];
525 else:
526 sHex = hex(-iValue - 1);
527 if sys.version_info[0] < 3:
528 assert sHex[-1] == 'L';
529 sHex = sHex[:-1];
530 assert sHex[:2] == '0x';
531 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
532
533 cDigits = len(sHex);
534 if cDigits <= self.acbSizes[-1] * 2:
535 for cb in self.acbSizes:
536 if cDigits <= cb * 2:
537 cDigits = int((cDigits + cb - 1) / cb) * cb; # Seems like integer division returns a float in python.
538 break;
539 else:
540 cDigits = int((cDigits + self.acbSizes[-1] - 1) / self.acbSizes[-1]) * self.acbSizes[-1];
541 assert isinstance(cDigits, int)
542
543 if cDigits != len(sHex):
544 cNeeded = cDigits - len(sHex);
545 if iValue >= 0:
546 sHex = ('0' * cNeeded) + sHex;
547 else:
548 sHex = ('f' * cNeeded) + sHex;
549
550 # Invert and convert to bytearray and return it.
551 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
552
553 return ((fSignExtend, abValue),);
554
555 def validate(self, sValue):
556 """
557 Returns True if value is okay, error message on failure.
558 """
559 try:
560 self.get(sValue);
561 except TestType.BadValue as oXcpt:
562 return oXcpt.sMessage;
563 return True;
564
565 def isAndOrPair(self, sValue):
566 """
567 Checks if sValue is a pair.
568 """
569 return False;
570
571
572class TestTypeEflags(TestType):
573 """
574 Special value parsing for EFLAGS/RFLAGS/FLAGS.
575 """
576
577 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
578
579 def __init__(self, sName):
580 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
581
582 def get(self, sValue):
583 fClear = 0;
584 fSet = 0;
585 for sFlag in sValue.split(','):
586 sConstant = SimpleParser.kdEFlags.get(sFlag, None);
587 if sConstant is None:
588 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
589 if sConstant[0] == '!':
590 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
591 else:
592 fSet |= g_kdX86EFlagsConstants[sConstant];
593
594 aoSet = TestType.get(self, '0x%x' % (fSet,));
595 if fClear != 0:
596 aoClear = TestType.get(self, '%#x' % (~fClear))
597 assert self.isAndOrPair(sValue) == True;
598 return (aoClear[0], aoSet[0]);
599 assert self.isAndOrPair(sValue) == False;
600 return aoSet;
601
602 def isAndOrPair(self, sValue):
603 for sZeroFlag in self.kdZeroValueFlags.keys():
604 if sValue.find(sZeroFlag) >= 0:
605 return True;
606 return False;
607
608
609
610class TestInOut(object):
611 """
612 One input or output state modifier.
613
614 This should be thought as values to modify BS3REGCTX and extended (needs
615 to be structured) state.
616 """
617 ## Assigned operators.
618 kasOperators = [
619 '&|=', # Special AND+OR operator for use with EFLAGS.
620 '&~=',
621 '&=',
622 '|=',
623 '='
624 ];
625 ## Types
626 kdTypes = {
627 'uint': TestType('uint', fUnsigned = True),
628 'int': TestType('int'),
629 'efl': TestTypeEflags('efl'),
630 };
631 ## CPU context fields.
632 kdFields = {
633 # name: ( default type, tbd, )
634 # Operands.
635 'op1': ( 'uint', '', ), ## \@op1
636 'op2': ( 'uint', '', ), ## \@op2
637 'op3': ( 'uint', '', ), ## \@op3
638 'op4': ( 'uint', '', ), ## \@op4
639 # Flags.
640 'efl': ( 'efl', '', ),
641 # 8-bit GPRs.
642 'al': ( 'uint', '', ),
643 'cl': ( 'uint', '', ),
644 'dl': ( 'uint', '', ),
645 'bl': ( 'uint', '', ),
646 'ah': ( 'uint', '', ),
647 'ch': ( 'uint', '', ),
648 'dh': ( 'uint', '', ),
649 'bh': ( 'uint', '', ),
650 'r8l': ( 'uint', '', ),
651 'r9l': ( 'uint', '', ),
652 'r10l': ( 'uint', '', ),
653 'r11l': ( 'uint', '', ),
654 'r12l': ( 'uint', '', ),
655 'r13l': ( 'uint', '', ),
656 'r14l': ( 'uint', '', ),
657 'r15l': ( 'uint', '', ),
658 # 16-bit GPRs.
659 'ax': ( 'uint', '', ),
660 'dx': ( 'uint', '', ),
661 'cx': ( 'uint', '', ),
662 'bx': ( 'uint', '', ),
663 'sp': ( 'uint', '', ),
664 'bp': ( 'uint', '', ),
665 'si': ( 'uint', '', ),
666 'di': ( 'uint', '', ),
667 'r8w': ( 'uint', '', ),
668 'r9w': ( 'uint', '', ),
669 'r10w': ( 'uint', '', ),
670 'r11w': ( 'uint', '', ),
671 'r12w': ( 'uint', '', ),
672 'r13w': ( 'uint', '', ),
673 'r14w': ( 'uint', '', ),
674 'r15w': ( 'uint', '', ),
675 # 32-bit GPRs.
676 'eax': ( 'uint', '', ),
677 'edx': ( 'uint', '', ),
678 'ecx': ( 'uint', '', ),
679 'ebx': ( 'uint', '', ),
680 'esp': ( 'uint', '', ),
681 'ebp': ( 'uint', '', ),
682 'esi': ( 'uint', '', ),
683 'edi': ( 'uint', '', ),
684 'r8d': ( 'uint', '', ),
685 'r9d': ( 'uint', '', ),
686 'r10d': ( 'uint', '', ),
687 'r11d': ( 'uint', '', ),
688 'r12d': ( 'uint', '', ),
689 'r13d': ( 'uint', '', ),
690 'r14d': ( 'uint', '', ),
691 'r15d': ( 'uint', '', ),
692 # 64-bit GPRs.
693 'rax': ( 'uint', '', ),
694 'rdx': ( 'uint', '', ),
695 'rcx': ( 'uint', '', ),
696 'rbx': ( 'uint', '', ),
697 'rsp': ( 'uint', '', ),
698 'rbp': ( 'uint', '', ),
699 'rsi': ( 'uint', '', ),
700 'rdi': ( 'uint', '', ),
701 'r8': ( 'uint', '', ),
702 'r9': ( 'uint', '', ),
703 'r10': ( 'uint', '', ),
704 'r11': ( 'uint', '', ),
705 'r12': ( 'uint', '', ),
706 'r13': ( 'uint', '', ),
707 'r14': ( 'uint', '', ),
708 'r15': ( 'uint', '', ),
709 # 16-bit, 32-bit or 64-bit registers according to operand size.
710 'oz.rax': ( 'uint', '', ),
711 'oz.rdx': ( 'uint', '', ),
712 'oz.rcx': ( 'uint', '', ),
713 'oz.rbx': ( 'uint', '', ),
714 'oz.rsp': ( 'uint', '', ),
715 'oz.rbp': ( 'uint', '', ),
716 'oz.rsi': ( 'uint', '', ),
717 'oz.rdi': ( 'uint', '', ),
718 'oz.r8': ( 'uint', '', ),
719 'oz.r9': ( 'uint', '', ),
720 'oz.r10': ( 'uint', '', ),
721 'oz.r11': ( 'uint', '', ),
722 'oz.r12': ( 'uint', '', ),
723 'oz.r13': ( 'uint', '', ),
724 'oz.r14': ( 'uint', '', ),
725 'oz.r15': ( 'uint', '', ),
726 };
727
728 def __init__(self, sField, sOp, sValue, sType):
729 assert sField in self.kdFields;
730 assert sOp in self.kasOperators;
731 self.sField = sField;
732 self.sOp = sOp;
733 self.sValue = sValue;
734 self.sType = sType;
735
736
737class TestSelector(object):
738 """
739 One selector for an instruction test.
740 """
741 ## Selector compare operators.
742 kasCompareOps = [ '==', '!=' ];
743 ## Selector variables and their valid values.
744 kdVariables = {
745 # Operand size.
746 'size': {
747 'o16': 'size_o16',
748 'o32': 'size_o32',
749 'o64': 'size_o64',
750 },
751 # Execution ring.
752 'ring': {
753 '0': 'ring_0',
754 '1': 'ring_1',
755 '2': 'ring_2',
756 '3': 'ring_3',
757 '0..2': 'ring_0_thru_2',
758 '1..3': 'ring_1_thru_3',
759 },
760 # Basic code mode.
761 'codebits': {
762 '64': 'code_64bit',
763 '32': 'code_32bit',
764 '16': 'code_16bit',
765 },
766 # cpu modes.
767 'mode': {
768 'real': 'mode_real',
769 'prot': 'mode_prot',
770 'long': 'mode_long',
771 'v86': 'mode_v86',
772 'smm': 'mode_smm',
773 'vmx': 'mode_vmx',
774 'svm': 'mode_svm',
775 },
776 # paging on/off
777 'paging': {
778 'on': 'paging_on',
779 'off': 'paging_off',
780 },
781 };
782 ## Selector shorthand predicates.
783 ## These translates into variable expressions.
784 kdPredicates = {
785 'o16': 'size==o16',
786 'o32': 'size==o32',
787 'o64': 'size==o64',
788 'ring0': 'ring==0',
789 '!ring0': 'ring==1..3',
790 'ring1': 'ring==1',
791 'ring2': 'ring==2',
792 'ring3': 'ring==3',
793 'user': 'ring==3',
794 'supervisor': 'ring==0..2',
795 'real': 'mode==real',
796 'prot': 'mode==prot',
797 'long': 'mode==long',
798 'v86': 'mode==v86',
799 'smm': 'mode==smm',
800 'vmx': 'mode==vmx',
801 'svm': 'mode==svm',
802 'paging': 'paging==on',
803 '!paging': 'paging==off',
804 };
805
806 def __init__(self, sVariable, sOp, sValue):
807 assert sVariable in self.kdVariables;
808 assert sOp in self.kasCompareOps;
809 assert sValue in self.kdVariables[sVariable];
810 self.sVariable = sVariable;
811 self.sOp = sOp;
812 self.sValue = sValue;
813
814
815class InstructionTest(object):
816 """
817 Instruction test.
818 """
819
820 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
821 self.oInstr = oInstr; # type: InstructionTest
822 self.aoInputs = [];
823 self.aoOutputs = [];
824 self.aoSelectors = []; # type: list(TestSelector)
825
826
827class Operand(object):
828 """
829 Instruction operand.
830 """
831
832 def __init__(self, sWhere, sType):
833 assert sWhere in g_kdOpLocations, sWhere;
834 assert sType in g_kdOpTypes, sType;
835 self.sWhere = sWhere; ##< g_kdOpLocations
836 self.sType = sType; ##< g_kdOpTypes
837
838 def usesModRM(self):
839 """ Returns True if using some form of ModR/M encoding. """
840 return self.sType[0] in ['E', 'G', 'M'];
841
842
843
844class Instruction(object):
845 """
846 Instruction.
847 """
848
849 def __init__(self, sSrcFile, iLine):
850 ## @name Core attributes.
851 ## @{
852 self.sMnemonic = None;
853 self.sBrief = None;
854 self.asDescSections = []; # type: list(str)
855 self.aoMaps = []; # type: list(InstructionMap)
856 self.aoOperands = []; # type: list(Operand)
857 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
858 self.sOpcode = None;
859 self.sEncoding = None;
860 self.asFlTest = None;
861 self.asFlModify = None;
862 self.asFlUndefined = None;
863 self.asFlSet = None;
864 self.asFlClear = None;
865 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
866 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
867 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
868 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
869 self.aoTests = []; # type: list(InstructionTest)
870 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
871 self.oCpuExpr = None; ##< Some CPU restriction expression...
872 self.sGroup = None;
873 self.fUnused = False; ##< Unused instruction.
874 self.fInvalid = False; ##< Invalid instruction (like UD2).
875 self.sInvalidStyle = None; ##< Invalid behviour style
876 ## @}
877
878 ## @name Implementation attributes.
879 ## @{
880 self.sStats = None;
881 self.sFunction = None;
882 self.fStub = False;
883 self.fUdStub = False;
884 ## @}
885
886 ## @name Decoding info
887 ## @{
888 self.sSrcFile = sSrcFile;
889 self.iLineCreated = iLine;
890 self.iLineCompleted = None;
891 self.cOpTags = 0;
892 self.iLineFnIemOpMacro = -1;
893 self.iLineMnemonicMacro = -1;
894 ## @}
895
896 ## @name Intermediate input fields.
897 ## @{
898 self.sRawDisOpNo = None;
899 self.asRawDisParams = [];
900 self.sRawIemOpFlags = None;
901 self.sRawOldOpcodes = None;
902 ## @}
903
904 def toString(self, fRepr = False):
905 """ Turn object into a string. """
906 aasFields = [];
907
908 aasFields.append(['opcode', self.sOpcode]);
909 aasFields.append(['mnemonic', self.sMnemonic]);
910 for iOperand, oOperand in enumerate(self.aoOperands):
911 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
912 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
913 aasFields.append(['encoding', self.sEncoding]);
914 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
915 aasFields.append(['disenum', self.sDisEnum]);
916 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
917 aasFields.append(['group', self.sGroup]);
918 if self.fUnused: aasFields.append(['unused', 'True']);
919 if self.fInvalid: aasFields.append(['invalid', 'True']);
920 aasFields.append(['invlstyle', self.sInvalidStyle]);
921 aasFields.append(['fltest', self.asFlTest]);
922 aasFields.append(['flmodify', self.asFlModify]);
923 aasFields.append(['flundef', self.asFlUndefined]);
924 aasFields.append(['flset', self.asFlSet]);
925 aasFields.append(['flclear', self.asFlClear]);
926 aasFields.append(['mincpu', self.sMinCpu]);
927 aasFields.append(['stats', self.sStats]);
928 aasFields.append(['sFunction', self.sFunction]);
929 if self.fStub: aasFields.append(['fStub', 'True']);
930 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
931 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
932 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
933 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
934
935 sRet = '<' if fRepr else '';
936 for sField, sValue in aasFields:
937 if sValue != None:
938 if len(sRet) > 1:
939 sRet += '; ';
940 sRet += '%s=%s' % (sField, sValue,);
941 if fRepr:
942 sRet += '>';
943
944 return sRet;
945
946 def __str__(self):
947 """ Provide string represenation. """
948 return self.toString(False);
949
950 def __repr__(self):
951 """ Provide unambigious string representation. """
952 return self.toString(True);
953
954 def getOpcodeByte(self):
955 """
956 Decodes sOpcode into a byte range integer value.
957 Raises exception if sOpcode is None or invalid.
958 """
959 if self.sOpcode is None:
960 raise Exception('No opcode byte for %s!' % (self,));
961
962 # Full hex byte form.
963 if self.sOpcode[:2] == '0x':
964 return int(self.sOpcode, 16);
965
966 # The /r form:
967 if self.sOpcode[0] == '/' and self.sOpcode[1].isdigit() and len(self.sOpcode) == 2:
968 return int(self.sOpcode[1:]) << 3;
969
970 raise Exception('unsupported opcode byte spec "%s" for %s' % (self.sOpcode, self,));
971 return -1;
972
973
974
975## All the instructions.
976g_aoAllInstructions = []; # type: Instruction
977
978## Instruction maps.
979g_dInstructionMaps = {
980 'one': InstructionMap('one'),
981 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
982 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
983 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
984 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
985 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
986 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
987 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
988 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
989 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
990 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
991 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
992 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
993 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
994 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
995 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
996 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
997 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
998 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
999 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1000
1001 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1002 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1003 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1004 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1005 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1006 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1007 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1008 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1009 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1010 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1011 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1012 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1013 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1014 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1015
1016 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1017 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1018 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1019
1020 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1021 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1022 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1023 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1024 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1025 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1026
1027 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1028 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1029
1030 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1031 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1032 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1033 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1034 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1035 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1036 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1037};
1038
1039
1040
1041class ParserException(Exception):
1042 """ Parser exception """
1043 def __init__(self, sMessage):
1044 Exception.__init__(self, sMessage);
1045
1046
1047class SimpleParser(object):
1048 """
1049 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1050 """
1051
1052 ## @name Parser state.
1053 ## @{
1054 kiCode = 0;
1055 kiCommentMulti = 1;
1056 ## @}
1057
1058 def __init__(self, sSrcFile, asLines, sDefaultMap):
1059 self.sSrcFile = sSrcFile;
1060 self.asLines = asLines;
1061 self.iLine = 0;
1062 self.iState = self.kiCode;
1063 self.sComment = '';
1064 self.iCommentLine = 0;
1065 self.aoCurInstrs = [];
1066
1067 assert sDefaultMap in g_dInstructionMaps;
1068 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1069
1070 self.cTotalInstr = 0;
1071 self.cTotalStubs = 0;
1072 self.cTotalTagged = 0;
1073
1074 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1075 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1076 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1077 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1078 self.oReGroupName = re.compile('^op_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1079 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1080 self.fDebug = True;
1081
1082 self.dTagHandlers = {
1083 '@opbrief': self.parseTagOpBrief,
1084 '@opdesc': self.parseTagOpDesc,
1085 '@opmnemonic': self.parseTagOpMnemonic,
1086 '@op1': self.parseTagOpOperandN,
1087 '@op2': self.parseTagOpOperandN,
1088 '@op3': self.parseTagOpOperandN,
1089 '@op4': self.parseTagOpOperandN,
1090 '@oppfx': self.parseTagOpPfx,
1091 '@opmaps': self.parseTagOpMaps,
1092 '@opcode': self.parseTagOpcode,
1093 '@openc': self.parseTagOpEnc,
1094 '@opfltest': self.parseTagOpEFlags,
1095 '@opflmodify': self.parseTagOpEFlags,
1096 '@opflundef': self.parseTagOpEFlags,
1097 '@opflset': self.parseTagOpEFlags,
1098 '@opflclear': self.parseTagOpEFlags,
1099 '@ophints': self.parseTagOpHints,
1100 '@opdisenum': self.parseTagOpDisEnum,
1101 '@opmincpu': self.parseTagOpMinCpu,
1102 '@opcpuid': self.parseTagOpCpuId,
1103 '@opgroup': self.parseTagOpGroup,
1104 '@opunused': self.parseTagOpUnusedInvalid,
1105 '@opinvalid': self.parseTagOpUnusedInvalid,
1106 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1107 '@optest': self.parseTagOpTest,
1108 '@opstats': self.parseTagOpStats,
1109 '@opfunction': self.parseTagOpFunction,
1110 '@opdone': self.parseTagOpDone,
1111 };
1112
1113 self.asErrors = [];
1114
1115 def raiseError(self, sMessage):
1116 """
1117 Raise error prefixed with the source and line number.
1118 """
1119 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1120
1121 def raiseCommentError(self, iLineInComment, sMessage):
1122 """
1123 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1124 """
1125 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1126
1127 def error(self, sMessage):
1128 """
1129 Adds an error.
1130 returns False;
1131 """
1132 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1133 return False;
1134
1135 def errorComment(self, iLineInComment, sMessage):
1136 """
1137 Adds a comment error.
1138 returns False;
1139 """
1140 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1141 return False;
1142
1143 def printErrors(self):
1144 """
1145 Print the errors to stderr.
1146 Returns number of errors.
1147 """
1148 if len(self.asErrors) > 0:
1149 sys.stderr.write(u''.join(self.asErrors));
1150 return len(self.asErrors);
1151
1152 def debug(self, sMessage):
1153 """
1154 """
1155 if self.fDebug:
1156 print('debug: %s' % (sMessage,));
1157
1158
1159 def addInstruction(self, iLine = None):
1160 """
1161 Adds an instruction.
1162 """
1163 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1164 g_aoAllInstructions.append(oInstr);
1165 self.aoCurInstrs.append(oInstr);
1166 return oInstr;
1167
1168 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1169 """
1170 Derives the mnemonic and operands from a IEM stats base name like string.
1171 """
1172 if oInstr.sMnemonic is None:
1173 asWords = sStats.split('_');
1174 oInstr.sMnemonic = asWords[0].lower();
1175 if len(asWords) > 1 and len(oInstr.aoOperands) == 0:
1176 for sType in asWords[1:]:
1177 if sType in g_kdOpTypes:
1178 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1179 else:
1180 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1181 return False;
1182 return True;
1183
1184 def doneInstructionOne(self, oInstr, iLine):
1185 """
1186 Complete the parsing by processing, validating and expanding raw inputs.
1187 """
1188 assert oInstr.iLineCompleted is None;
1189 oInstr.iLineCompleted = iLine;
1190
1191 #
1192 # Specified instructions.
1193 #
1194 if oInstr.cOpTags > 0:
1195 if oInstr.sStats is None:
1196 pass;
1197
1198 #
1199 # Unspecified legacy stuff. We generally only got a few things to go on here.
1200 # /** Opcode 0x0f 0x00 /0. */
1201 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1202 #
1203 else:
1204 #if oInstr.sRawOldOpcodes:
1205 #
1206 #if oInstr.sMnemonic:
1207 pass;
1208
1209 #
1210 # Common defaults.
1211 #
1212
1213 # Guess mnemonic and operands from stats if the former is missing.
1214 if oInstr.sMnemonic is None:
1215 if oInstr.sStats is not None:
1216 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1217 elif oInstr.sFunction is not None:
1218 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1219
1220 # Derive the disassembler op enum constant from the mnemonic.
1221 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1222 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1223
1224 # Derive the IEM statistics base name from mnemonic and operand types.
1225 if oInstr.sStats is None:
1226 if oInstr.sFunction is not None:
1227 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1228 elif oInstr.sMnemonic is not None:
1229 oInstr.sStats = oInstr.sMnemonic;
1230 for oOperand in oInstr.aoOperands:
1231 if oOperand.sType:
1232 oInstr.sStats += '_' + oOperand.sType;
1233
1234 # Derive the IEM function name from mnemonic and operand types.
1235 if oInstr.sFunction is None:
1236 if oInstr.sMnemonic is not None:
1237 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1238 for oOperand in oInstr.aoOperands:
1239 if oOperand.sType:
1240 oInstr.sFunction += '_' + oOperand.sType;
1241 elif oInstr.sStats:
1242 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1243
1244 # Derive encoding from operands.
1245 if oInstr.sEncoding is None:
1246 if len(oInstr.aoOperands) == 0:
1247 oInstr.sEncoding = 'fixed';
1248 elif oInstr.aoOperands[0].usesModRM():
1249 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1250 oInstr.sEncoding = 'ModR/M+VEX';
1251 else:
1252 oInstr.sEncoding = 'ModR/M';
1253
1254 #
1255 # Apply default map and then add the instruction to all it's groups.
1256 #
1257 if len(oInstr.aoMaps) == 0:
1258 oInstr.aoMaps = [ self.oDefaultMap, ];
1259 for oMap in oInstr.aoMaps:
1260 oMap.aoInstructions.append(oInstr);
1261
1262 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1263 return True;
1264
1265 def doneInstructions(self, iLineInComment = None):
1266 """
1267 Done with current instruction.
1268 """
1269 for oInstr in self.aoCurInstrs:
1270 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1271 if oInstr.fStub:
1272 self.cTotalStubs += 1;
1273
1274 self.cTotalInstr += len(self.aoCurInstrs);
1275
1276 self.sComment = '';
1277 self.aoCurInstrs = [];
1278 return True;
1279
1280 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1281 """
1282 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1283 is False, only None values and empty strings are replaced.
1284 """
1285 for oInstr in self.aoCurInstrs:
1286 if fOverwrite is not True:
1287 oOldValue = getattr(oInstr, sAttrib);
1288 if oOldValue is not None:
1289 continue;
1290 setattr(oInstr, sAttrib, oValue);
1291
1292 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1293 """
1294 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1295 If fOverwrite is False, only None values and empty strings are replaced.
1296 """
1297 for oInstr in self.aoCurInstrs:
1298 aoArray = getattr(oInstr, sAttrib);
1299 while len(aoArray) <= iEntry:
1300 aoArray.append(None);
1301 if fOverwrite is True or aoArray[iEntry] is None:
1302 aoArray[iEntry] = oValue;
1303
1304 def parseCommentOldOpcode(self, asLines):
1305 """ Deals with 'Opcode 0xff /4' like comments """
1306 asWords = asLines[0].split();
1307 if len(asWords) >= 2 \
1308 and asWords[0] == 'Opcode' \
1309 and ( asWords[1].startswith('0x')
1310 or asWords[1].startswith('0X')):
1311 asWords = asWords[:1];
1312 for iWord, sWord in enumerate(asWords):
1313 if sWord.startswith('0X'):
1314 sWord = '0x' + sWord[:2];
1315 asWords[iWord] = asWords;
1316 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1317
1318 return False;
1319
1320 def ensureInstructionForOpTag(self, iTagLine):
1321 """ Ensure there is an instruction for the op-tag being parsed. """
1322 if len(self.aoCurInstrs) == 0:
1323 self.addInstruction(self.iCommentLine + iTagLine);
1324 for oInstr in self.aoCurInstrs:
1325 oInstr.cOpTags += 1;
1326 if oInstr.cOpTags == 1:
1327 self.cTotalTagged += 1;
1328 return self.aoCurInstrs[-1];
1329
1330 @staticmethod
1331 def flattenSections(aasSections):
1332 """
1333 Flattens multiline sections into stripped single strings.
1334 Returns list of strings, on section per string.
1335 """
1336 asRet = [];
1337 for asLines in assSections:
1338 if len(asLines) > 0:
1339 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1340 return asRet;
1341
1342 @staticmethod
1343 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1344 """
1345 Flattens sections into a simple stripped string with newlines as
1346 section breaks. The final section does not sport a trailing newline.
1347 """
1348 # Typical: One section with a single line.
1349 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1350 return aasSections[0][0].strip();
1351
1352 sRet = '';
1353 for iSection, asLines in enumerate(aasSections):
1354 if len(asLines) > 0:
1355 if iSection > 0:
1356 sRet += sSectionSep;
1357 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1358 return sRet;
1359
1360
1361
1362 ## @name Tag parsers
1363 ## @{
1364
1365 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1366 """
1367 Tag: \@opbrief
1368 Value: Text description, multiple sections, appended.
1369
1370 Brief description. If not given, it's the first sentence from @opdesc.
1371 """
1372 oInstr = self.ensureInstructionForOpTag(iTagLine);
1373
1374 # Flatten and validate the value.
1375 sBrief = self.flattenAllSections(aasSections);
1376 if len(sBrief) == 0:
1377 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1378 if sBrief[-1] != '.':
1379 sBrief = sBrief + '.';
1380 if len(sBrief) > 180:
1381 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1382 offDot = sBrief.find('.');
1383 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1384 offDot = sBrief.find('.', offDot + 1);
1385 if offDot >= 0 and offDot != len(sBrief) - 1:
1386 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1387
1388 # Update the instruction.
1389 if oInstr.sBrief is not None:
1390 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1391 % (sTag, oInstr.sBrief, sBrief,));
1392 _ = iEndLine;
1393 return True;
1394
1395 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1396 """
1397 Tag: \@opdesc
1398 Value: Text description, multiple sections, appended.
1399
1400 It is used to describe instructions.
1401 """
1402 oInstr = self.ensureInstructionForOpTag(iTagLine);
1403 if len(self.aoInstructions) > 0 and len(aasSections) > 0:
1404 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1405 return True;
1406
1407 _ = sTag; _ = iEndLine;
1408 return True;
1409
1410 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1411 """
1412 Tag: @opmenmonic
1413 Value: mnemonic
1414
1415 The 'mnemonic' value must be a valid C identifier string. Because of
1416 prefixes, groups and whatnot, there times when the mnemonic isn't that
1417 of an actual assembler mnemonic.
1418 """
1419 oInstr = self.ensureInstructionForOpTag(iTagLine);
1420
1421 # Flatten and validate the value.
1422 sMnemonic = self.flattenAllSections(aasSections);
1423 if not self.oReMnemonic.match(sMnemonic):
1424 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1425 if oInstr.sMnemonic is not None:
1426 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1427 % (sTag, oInstr.sMnemonic, sMnemonic,));
1428 oInstr.sMnemonic = sMnemonic
1429
1430 _ = iEndLine;
1431 return True;
1432
1433 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1434 """
1435 Tags: \@op1, \@op2, \@op3, \@op4
1436 Value: [where:]type
1437
1438 The 'where' value indicates where the operand is found, like the 'reg'
1439 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1440 a list.
1441
1442 The 'type' value indicates the operand type. These follow the types
1443 given in the opcode tables in the CPU reference manuals.
1444 See Instruction.kdOperandTypes for a list.
1445
1446 """
1447 oInstr = self.ensureInstructionForOpTag(iTagLine);
1448 idxOp = int(sTag[-1]) - 1;
1449 assert idxOp >= 0 and idxOp < 4;
1450
1451 # flatten, split up, and validate the "where:type" value.
1452 sFlattened = self.flattenAllSections(aasSections);
1453 asSplit = sFlattened.split(':');
1454 if len(asSplit) == 1:
1455 sType = asSplit[0];
1456 sWhere = None;
1457 elif len(asSplit) == 2:
1458 (sWhere, sType) = asSplit;
1459 else:
1460 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1461
1462 if sType not in g_kdOpTypes:
1463 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1464 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1465 if sWhere is None:
1466 sWhere = g_kdOpTypes[sType][1];
1467 elif sWhere not in g_kdOpLocations:
1468 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1469 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),), iTagLine);
1470
1471 # Insert the operand, refusing to overwrite an existing one.
1472 while idxOp >= len(oInstr.aoOperands):
1473 oInstr.aoOperands.append(None);
1474 if oInstr.aoOperands[idxOp] is not None:
1475 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1476 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1477 sWhere, sType,));
1478 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1479
1480 _ = iEndLine;
1481 return True;
1482
1483 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1484 """
1485 Tag: \@opmaps
1486 Value: map[,map2]
1487
1488 Indicates which maps the instruction is in. There is a default map
1489 associated with each input file.
1490 """
1491 oInstr = self.ensureInstructionForOpTag(iTagLine);
1492
1493 # Flatten, split up and validate the value.
1494 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1495 asMaps = sFlattened.split(',');
1496 if len(asMaps) == 0:
1497 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1498 for sMap in asMaps:
1499 if sMap not in g_dInstructionMaps:
1500 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1501 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1502
1503 # Add the maps to the current list. Throw errors on duplicates.
1504 for oMap in oInstr.aoMaps:
1505 if oMap.sName in asMaps:
1506 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1507
1508 for sMap in asMaps:
1509 oMap = g_dInstructionMaps[sMap];
1510 if oMap not in oInstr.aoMaps:
1511 oInstr.aoMaps.append(oMap);
1512 else:
1513 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1514
1515 _ = iEndLine;
1516 return True;
1517
1518 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1519 """
1520 Tag: \@oppfx
1521 Value: none|0x66|0xf3|0xf2
1522
1523 Required prefix for the instruction. (In a (E)VEX context this is the
1524 value of the 'pp' field rather than an actual prefix.)
1525 """
1526 oInstr = self.ensureInstructionForOpTag(iTagLine);
1527
1528 # Flatten and validate the value.
1529 sFlattened = self.flattenAllSections(aasSections);
1530 asPrefixes = sFlattened.split();
1531 if len(asPrefixes) > 1:
1532 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1533
1534 sPrefix = asPrefixes[0].lower();
1535 if sPrefix == 'none':
1536 sPrefix = None;
1537 else:
1538 if len(sPrefix) == 2:
1539 sPrefix = '0x' + sPrefix;
1540 if _isValidOpcodeByte(sPrefix):
1541 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1542
1543 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1544 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1545
1546 # Set it.
1547 if oInstr.sPrefix is not None:
1548 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1549 oInstr.sPrefix = sPrefix;
1550
1551 _ = iEndLine;
1552 return True;
1553
1554 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1555 """
1556 Tag: \@opcode
1557 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1558
1559 The opcode byte or sub-byte for the instruction in the context of a map.
1560 """
1561 oInstr = self.ensureInstructionForOpTag(iTagLine);
1562
1563 # Flatten and validate the value.
1564 sOpcode = self.flattenAllSections(aasSections);
1565 if sOpcode in g_kdSpecialOpcodes:
1566 pass;
1567 elif not _isValidOpcodeByte(sOpcode):
1568 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1569
1570 # Set it.
1571 if oInstr.sOpcode is not None:
1572 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1573 oInstr.sOpcode = sOpcode;
1574
1575 _ = iEndLine;
1576 return True;
1577
1578 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1579 """
1580 Tag: \@openc
1581 Value: ModR/M|fixed|prefix|<map name>
1582
1583 The instruction operand encoding style.
1584 """
1585 oInstr = self.ensureInstructionForOpTag(iTagLine);
1586
1587 # Flatten and validate the value.
1588 sEncoding = self.flattenAllSections(aasSections);
1589 if sEncoding in g_kdEncodings:
1590 pass;
1591 elif sEncoding in g_dInstructionMaps:
1592 pass;
1593 elif not _isValidOpcodeByte(sEncoding):
1594 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1595
1596 # Set it.
1597 if oInstr.sEncoding is not None:
1598 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1599 % ( sTag, oInstr.sEncoding, sEncoding,));
1600 oInstr.sEncoding = sEncoding;
1601
1602 _ = iEndLine;
1603 return True;
1604
1605 ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1606 kdEFlags = {
1607 # Debugger flag notation:
1608 'ov': 'X86_EFL_OF', ##< OVerflow.
1609 'nv': '!X86_EFL_OF', ##< No Overflow.
1610
1611 'ng': 'X86_EFL_SF', ##< NeGative (sign).
1612 'pl': '!X86_EFL_SF', ##< PLuss (sign).
1613
1614 'zr': 'X86_EFL_ZF', ##< ZeRo.
1615 'nz': '!X86_EFL_ZF', ##< No Zero.
1616
1617 'af': 'X86_EFL_AF', ##< Aux Flag.
1618 'na': '!X86_EFL_AF', ##< No Aux.
1619
1620 'po': 'X86_EFL_PF', ##< Parity Pdd.
1621 'pe': '!X86_EFL_PF', ##< Parity Even.
1622
1623 'cf': 'X86_EFL_CF', ##< Carry Flag.
1624 'nc': '!X86_EFL_CF', ##< No Carry.
1625
1626 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1627 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1628
1629 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1630 'up': '!X86_EFL_DF', ##< UP (string op direction).
1631
1632 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1633 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1634 'ac': 'X86_EFL_AC', ##< Alignment Check.
1635 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1636 'rf': 'X86_EFL_RF', ##< Resume Flag.
1637 'nt': 'X86_EFL_NT', ##< Nested Task.
1638 'tf': 'X86_EFL_TF', ##< Trap flag.
1639
1640 # Reference manual notation:
1641 'of': 'X86_EFL_OF',
1642 'sf': 'X86_EFL_SF',
1643 'zf': 'X86_EFL_ZF',
1644 'cf': 'X86_EFL_CF',
1645 'pf': 'X86_EFL_PF',
1646 'if': 'X86_EFL_IF',
1647 'df': 'X86_EFL_DF',
1648 'iopl': 'X86_EFL_IOPL',
1649 'id': 'X86_EFL_ID',
1650 };
1651
1652 ## EFlags tag to Instruction attribute name.
1653 kdOpFlagToAttr = {
1654 '@opfltest': 'asFlTest',
1655 '@opflmodify': 'asFlModify',
1656 '@opflundef': 'asFlUndefined',
1657 '@opflset': 'asFlSet',
1658 '@opflclear': 'asFlClear',
1659 };
1660
1661 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1662 """
1663 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1664 Value: <eflags specifier>
1665
1666 """
1667 oInstr = self.ensureInstructionForOpTag(iTagLine);
1668
1669 # Flatten, split up and validate the values.
1670 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1671 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1672 asFlags = [];
1673 else:
1674 fRc = True;
1675 for iFlag, sFlag in enumerate(asFlags):
1676 if sFlag not in self.kdEFlags:
1677 if sFlag.strip() in self.kdEFlags:
1678 asFlags[iFlag] = sFlag.strip();
1679 else:
1680 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1681 if not fRc:
1682 return False;
1683
1684 # Set them.
1685 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1686 if asOld is not None:
1687 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1688 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1689
1690 _ = iEndLine;
1691 return True;
1692
1693 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1694 """
1695 Tag: \@ophints
1696 Value: Comma or space separated list of flags and hints.
1697
1698 This covers the disassembler flags table and more.
1699 """
1700 oInstr = self.ensureInstructionForOpTag(iTagLine);
1701
1702 # Flatten as a space separated list, split it up and validate the values.
1703 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1704 if len(asHints) == 1 and asHints[0].lower() == 'none':
1705 asHints = [];
1706 else:
1707 fRc = True;
1708 for iHint, sHint in enumerate(asHints):
1709 if sHint not in g_kdHints:
1710 if sHint.strip() in g_kdHints:
1711 sHint[iHint] = sHint.strip();
1712 else:
1713 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1714 if not fRc:
1715 return False;
1716
1717 # Append them.
1718 for sHint in asHints:
1719 if sHint not in oInstr.dHints:
1720 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1721 else:
1722 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1723
1724 _ = iEndLine;
1725 return True;
1726
1727 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1728 """
1729 Tag: \@opdisenum
1730 Value: OP_XXXX
1731
1732 This is for select a specific (legacy) disassembler enum value for the
1733 instruction.
1734 """
1735 oInstr = self.ensureInstructionForOpTag(iTagLine);
1736
1737 # Flatten and split.
1738 asWords = self.flattenAllSections(aasSections).split();
1739 if len(asWords) != 1:
1740 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1741 if len(asWords) == 0:
1742 return False;
1743 sDisEnum = asWords[0];
1744 if not self.oReDisEnum.match(sDisEnum):
1745 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1746 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1747
1748 # Set it.
1749 if oInstr.sDisEnum is not None:
1750 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1751 oInstr.sDisEnum = sDisEnum;
1752
1753 _ = iEndLine;
1754 return True;
1755
1756 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1757 """
1758 Tag: \@opmincpu
1759 Value: <simple CPU name>
1760
1761 Indicates when this instruction was introduced.
1762 """
1763 oInstr = self.ensureInstructionForOpTag(iTagLine);
1764
1765 # Flatten the value, split into words, make sure there's just one, valid it.
1766 asCpus = self.flattenAllSections(aasSections).split();
1767 if len(asCpus) > 1:
1768 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1769
1770 sMinCpu = asCpus[0];
1771 if sMinCpu in g_kdCpuNames:
1772 self.sMinCpu = sMinCpu;
1773 else:
1774 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1775 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1776
1777 # Set it.
1778 if oInstr.sMinCpu is None:
1779 oInstr.sMinCpu = sMinCpu;
1780 elif oInstr.sMinCpu != sMinCpu:
1781 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1782
1783 _ = iEndLine;
1784 return True;
1785
1786 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1787 """
1788 Tag: \@opcpuid
1789 Value: none | <CPUID flag specifier>
1790
1791 CPUID feature bit which is required for the instruction to be present.
1792 """
1793 oInstr = self.ensureInstructionForOpTag(iTagLine);
1794
1795 # Flatten as a space separated list, split it up and validate the values.
1796 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1797 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1798 asCpuIds = [];
1799 else:
1800 fRc = True;
1801 for iCpuId, sCpuId in enumerate(asCpuIds):
1802 if sCpuId not in g_kdCpuIdFlags:
1803 if sCpuId.strip() in g_kdCpuIdFlags:
1804 sCpuId[iCpuId] = sCpuId.strip();
1805 else:
1806 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1807 if not fRc:
1808 return False;
1809
1810 # Append them.
1811 for sCpuId in asCpuIds:
1812 if sCpuId not in oInstr.asCpuIds:
1813 oInstr.asCpuIds.append(sCpuId);
1814 else:
1815 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1816
1817 _ = iEndLine;
1818 return True;
1819
1820 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1821 """
1822 Tag: \@opgroup
1823 Value: op_grp1[_subgrp2[_subsubgrp3]]
1824
1825 Instruction grouping.
1826 """
1827 oInstr = self.ensureInstructionForOpTag(iTagLine);
1828
1829 # Flatten as a space separated list, split it up and validate the values.
1830 asGroups = self.flattenAllSections(aasSections).split();
1831 if len(asGroups) != 1:
1832 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1833 sGroup = asGroups[0];
1834 if not self.oReGroupName.match(sGroup):
1835 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1836 % (sTag, sGroup, self.oReGroupName.pattern));
1837
1838 # Set it.
1839 if oInstr.sGroup is not None:
1840 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1841 oInstr.sGroup = sGroup;
1842
1843 _ = iEndLine;
1844 return True;
1845
1846 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1847 """
1848 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1849 Value: <invalid opcode behaviour style>
1850
1851 The \@opunused indicates the specification is for a currently unused
1852 instruction encoding.
1853
1854 The \@opinvalid indicates the specification is for an invalid currently
1855 instruction encoding (like UD2).
1856
1857 The \@opinvlstyle just indicates how CPUs decode the instruction when
1858 not supported (\@opcpuid, \@opmincpu) or disabled.
1859 """
1860 oInstr = self.ensureInstructionForOpTag(iTagLine);
1861
1862 # Flatten as a space separated list, split it up and validate the values.
1863 asStyles = self.flattenAllSections(aasSections).split();
1864 if len(asStyles) != 1:
1865 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1866 sStyle = asStyles[0];
1867 if sStyle not in g_kdInvalidStyles:
1868 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1869 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
1870 # Set it.
1871 if oInstr.sInvlStyle is not None:
1872 return self.errorComment(iTagLine,
1873 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1874 % ( sTag, oInstr.sInvlStyle, sStyle,));
1875 oInstr.sInvlStyle = sStyle;
1876 if sTag == '@opunused':
1877 oInstr.fUnused = True;
1878 elif sTag == '@opinvalid':
1879 oInstr.fInvalid = True;
1880
1881 _ = iEndLine;
1882 return True;
1883
1884 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1885 """
1886 Tag: \@optest
1887 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1888 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1889
1890 The main idea here is to generate basic instruction tests.
1891
1892 The probably simplest way of handling the diverse input, would be to use
1893 it to produce size optimized byte code for a simple interpreter that
1894 modifies the register input and output states.
1895
1896 An alternative to the interpreter would be creating multiple tables,
1897 but that becomes rather complicated wrt what goes where and then to use
1898 them in an efficient manner.
1899 """
1900 oInstr = self.ensureInstructionForOpTag(iTagLine);
1901
1902 #
1903 # Do it section by section.
1904 #
1905 for asSectionLines in aasSections:
1906 #
1907 # Sort the input into outputs, inputs and selector conditions.
1908 #
1909 sFlatSection = self.flattenAllSections([asSectionLines,]);
1910 if len(sFlatSection) == 0:
1911 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1912 continue;
1913 oTest = InstructionTest(oInstr);
1914
1915 asSelectors = [];
1916 asInputs = [];
1917 asOutputs = [];
1918 asCur = asOutputs;
1919 fRc = True;
1920 asWords = sFlatSection.split();
1921 for iWord in range(len(asWords) - 1, -1, -1):
1922 sWord = asWords[iWord];
1923 # Check for array switchers.
1924 if sWord == '->':
1925 if asCur != asOutputs:
1926 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1927 break;
1928 asCur = asInputs;
1929 elif sWord == '/':
1930 if asCur != asInputs:
1931 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1932 break;
1933 asCur = asSelectors;
1934 else:
1935 asCur.insert(0, sWord);
1936
1937 #
1938 # Validate and add selectors.
1939 #
1940 for sCond in asSelectors:
1941 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1942 oSelector = None;
1943 for sOp in TestSelector.kasCompareOps:
1944 off = sCondExp.find(sOp);
1945 if off >= 0:
1946 sVariable = sCondExp[:off];
1947 sValue = sCondExp[off + len(sOp):];
1948 if sVariable in TestSelector.kdVariables:
1949 if sValue in TestSelector.kdVariables[sVariable]:
1950 oSelector = TestSelector(sVariable, sOp, sValue);
1951 else:
1952 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1953 % ( sTag, sValue, sCond,
1954 TestSelector.kdVariables[sVariable].keys(),));
1955 else:
1956 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1957 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1958 break;
1959 if oSelector is not None:
1960 for oExisting in oTest.aoSelectors:
1961 if oExisting.sVariable == oSelector.sVariable:
1962 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1963 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
1964 oTest.aoSelectors.append(oSelector);
1965 else:
1966 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1967
1968 #
1969 # Validate outputs and inputs, adding them to the test as we go along.
1970 #
1971 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1972 for sItem in asItems:
1973 oItem = None;
1974 for sOp in TestInOut.kasOperators:
1975 off = sItem.find(sOp);
1976 if off >= 0:
1977 sField = sItem[:off];
1978 sValueType = sItem[off + len(sOp):];
1979 if sField in TestInOut.kdFields:
1980 asSplit = sValueType.split(':', 1);
1981 sValue = asSplit[0];
1982 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1983 if sType in TestInOut.kdTypes:
1984 oValid = TestInOut.kdTypes[sType].validate(sValue);
1985 if oValid is True:
1986 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
1987 oItem = TestInOut(sField, sOp, sValue, sType);
1988 else:
1989 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with the "="'
1990 % ( sTag, sDesc, sItem, ));
1991 else:
1992 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1993 % ( sTag, sDesc, sValue, sItem, sType, ));
1994 else:
1995 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1996 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1997 else:
1998 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1999 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
2000 break;
2001 if oItem is not None:
2002 for oExisting in aoDst:
2003 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2004 self.errorComment(iTagLine,
2005 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2006 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2007 aoDst.append(oItem);
2008 else:
2009 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2010
2011 #
2012 # .
2013 #
2014 if fRc:
2015 oInstr.aoTests.append(oTest);
2016 else:
2017 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2018 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2019 % (sTag, asSelectors, asInputs, asOutputs,));
2020
2021 return True;
2022
2023 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2024 """
2025 Tag: \@opfunction
2026 Value: <VMM function name>
2027
2028 This is for explicitly setting the IEM function name. Normally we pick
2029 this up from the FNIEMOP_XXX macro invocation after the description, or
2030 generate it from the mnemonic and operands.
2031
2032 It it thought it maybe necessary to set it when specifying instructions
2033 which implementation isn't following immediately or aren't implemented yet.
2034 """
2035 oInstr = self.ensureInstructionForOpTag(iTagLine);
2036
2037 # Flatten and validate the value.
2038 sFunction = self.flattenAllSections(aasSections);
2039 if not self.oReFunctionName.match(sFunction):
2040 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2041 % (sTag, Name, self.oReFunctionName.pattern));
2042
2043 if oInstr.sFunction is not None:
2044 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2045 % (sTag, oInstr.sStats, sStats,));
2046 oInstr.sFunction = sFunction;
2047
2048 _ = iEndLine;
2049 return True;
2050
2051 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2052 """
2053 Tag: \@opstats
2054 Value: <VMM statistics base name>
2055
2056 This is for explicitly setting the statistics name. Normally we pick
2057 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2058 the mnemonic and operands.
2059
2060 It it thought it maybe necessary to set it when specifying instructions
2061 which implementation isn't following immediately or aren't implemented yet.
2062 """
2063 oInstr = self.ensureInstructionForOpTag(iTagLine);
2064
2065 # Flatten and validate the value.
2066 sStats = self.flattenAllSections(aasSections);
2067 if not self.oReStatsName.match(sStats):
2068 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2069 % (sTag, Name, self.oReStatsName.pattern));
2070
2071 if oInstr.sStats is not None:
2072 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2073 % (sTag, oInstr.sStats, sStats,));
2074 oInstr.sStats = sStats;
2075
2076 _ = iEndLine;
2077 return True;
2078
2079 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2080 """
2081 Tag: \@opdone
2082 Value: none
2083
2084 Used to explictily flush the instructions that have been specified.
2085 """
2086 sFlattened = self.flattenAllSections(aasSections);
2087 if sFlattened != '':
2088 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2089 _ = sTag; _ = iEndLine;
2090 return self.doneInstructions();
2091
2092 ## @}
2093
2094
2095 def parseComment(self):
2096 """
2097 Parse the current comment (self.sComment).
2098
2099 If it's a opcode specifiying comment, we reset the macro stuff.
2100 """
2101 #
2102 # Reject if comment doesn't seem to contain anything interesting.
2103 #
2104 if self.sComment.find('Opcode') < 0 \
2105 and self.sComment.find('@') < 0:
2106 return False;
2107
2108 #
2109 # Split the comment into lines, removing leading asterisks and spaces.
2110 # Also remove leading and trailing empty lines.
2111 #
2112 asLines = self.sComment.split('\n');
2113 for iLine, sLine in enumerate(asLines):
2114 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2115
2116 while len(asLines) > 0 and len(asLines[0]) == 0:
2117 self.iCommentLine += 1;
2118 asLines.pop(0);
2119
2120 while len(asLines) > 0 and len(asLines[-1]) == 0:
2121 asLines.pop(len(asLines) - 1);
2122
2123 #
2124 # Check for old style: Opcode 0x0f 0x12
2125 #
2126 if asLines[0].startswith('Opcode '):
2127 self.parseCommentOldOpcode(asLines);
2128
2129 #
2130 # Look for @op* tagged data.
2131 #
2132 cOpTags = 0;
2133 sFlatDefault = None;
2134 sCurTag = '@default';
2135 iCurTagLine = 0;
2136 asCurSection = [];
2137 aasSections = [ asCurSection, ];
2138 for iLine, sLine in enumerate(asLines):
2139 if not sLine.startswith('@'):
2140 if len(sLine) > 0:
2141 asCurSection.append(sLine);
2142 elif len(asCurSection) != 0:
2143 asCurSection = [];
2144 aasSections.append(asCurSection);
2145 else:
2146 #
2147 # Process the previous tag.
2148 #
2149 if sCurTag in self.dTagHandlers:
2150 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2151 cOpTags += 1;
2152 elif sCurTag.startswith('@op'):
2153 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2154 elif sCurTag == '@default':
2155 sFlatDefault = self.flattenAllSections(aasSections);
2156 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2157 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2158 elif sCurTag in ['@encoding', '@opencoding']:
2159 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2160
2161 #
2162 # New tag.
2163 #
2164 asSplit = sLine.split(None, 1);
2165 sCurTag = asSplit[0].lower();
2166 if len(asSplit) > 1:
2167 asCurSection = [asSplit[1],];
2168 else:
2169 asCurSection = [];
2170 aasSections = [asCurSection, ];
2171 iCurTagLine = iLine;
2172
2173 #
2174 # Process the final tag.
2175 #
2176 if sCurTag in self.dTagHandlers:
2177 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2178 cOpTags += 1;
2179 elif sCurTag.startswith('@op'):
2180 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2181 elif sCurTag == '@default':
2182 sFlatDefault = self.flattenAllSections(aasSections);
2183
2184 #
2185 # Don't allow default text in blocks containing @op*.
2186 #
2187 if cOpTags > 0 and len(sFlatDefault) > 0:
2188 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2189
2190 return True;
2191
2192 def parseMacroInvocation(self, sInvocation):
2193 """
2194 Parses a macro invocation.
2195
2196 Returns a tuple, first element is the offset following the macro
2197 invocation. The second element is a list of macro arguments, where the
2198 zero'th is the macro name.
2199 """
2200 # First the name.
2201 offOpen = sInvocation.find('(');
2202 if offOpen <= 0:
2203 raiseError("macro invocation open parenthesis not found");
2204 sName = sInvocation[:offOpen].strip();
2205 if not self.oReMacroName.match(sName):
2206 return self.error("invalid macro name '%s'" % (sName,));
2207 asRet = [sName, ];
2208
2209 # Arguments.
2210 iLine = self.iLine;
2211 cDepth = 1;
2212 off = offOpen + 1;
2213 offStart = off;
2214 while cDepth > 0:
2215 if off >= len(sInvocation):
2216 if iLine >= len(self.asLines):
2217 return self.error('macro invocation beyond end of file');
2218 sInvocation += self.asLines[iLine];
2219 iLine += 1;
2220 ch = sInvocation[off];
2221
2222 if ch == ',' or ch == ')':
2223 if cDepth == 1:
2224 asRet.append(sInvocation[offStart:off].strip());
2225 offStart = off + 1;
2226 if ch == ')':
2227 cDepth -= 1;
2228 elif ch == '(':
2229 cDepth += 1;
2230 off += 1;
2231
2232 return (off, asRet);
2233
2234 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2235 """
2236 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2237 """
2238 offHit = sCode.find(sMacro);
2239 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2240 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2241 return (offHit + offAfter, asRet);
2242 return (len(sCode), None);
2243
2244 def findAndParseMacroInvocation(self, sCode, sMacro):
2245 """
2246 Returns None if not found, arguments as per parseMacroInvocation if found.
2247 """
2248 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2249
2250 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2251 """
2252 Returns same as findAndParseMacroInvocation.
2253 """
2254 for sMacro in asMacro:
2255 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2256 if asRet is not None:
2257 return asRet;
2258 return None;
2259
2260 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2261 """
2262 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2263 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2264 """
2265 #
2266 # Some invocation checks.
2267 #
2268 if sUpper != sUpper.upper():
2269 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2270 if sLower != sLower.lower():
2271 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2272 if sUpper.lower() != sLower:
2273 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2274 if not self.oReMnemonic.match(sLower):
2275 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2276
2277 #
2278 # Check if sIemHints tells us to not consider this macro invocation.
2279 #
2280 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2281 return True;
2282
2283 # Apply to the last instruction only for now.
2284 if len(self.aoCurInstrs) == 0:
2285 self.addInstruction();
2286 oInstr = self.aoCurInstrs[-1];
2287 if oInstr.iLineMnemonicMacro == -1:
2288 oInstr.iLineMnemonicMacro = self.iLine;
2289 else:
2290 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2291 % (sMacro, self.iLineMnemonicMacro,));
2292
2293 # Mnemonic
2294 if oInstr.sMnemonic is None:
2295 oInstr.sMnemonic = sLower;
2296 elif oInstr.sMnemonic != sLower:
2297 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2298
2299 # Process operands.
2300 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2301 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2302 % (sMacro, len(oInstr.aoOperands), len(aoOperands),));
2303 for iOperand, sType in enumerate(asOperands):
2304 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2305 if sWhere is None:
2306 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2307 if iOperand < len(oInstr.aoOperands): # error recovery.
2308 sWhere = oInstr.aoOperands[iOperand].sWhere;
2309 sType = oInstr.aoOperands[iOperand].sType;
2310 else:
2311 sWhere = 'reg';
2312 sType = 'Gb';
2313 if iOperand == len(oInstr.aoOperands):
2314 oInstr.aoOperands.append(Operand(sWhere, sType))
2315 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2316 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2317 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2318 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2319
2320 # Encoding.
2321 if sForm not in g_kdIemForms:
2322 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2323 else:
2324 if oInstr.sEncoding is None:
2325 oInstr.sEncoding = g_kdIemForms[sForm][0];
2326 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2327 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2328 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2329
2330 # Check the parameter locations for the encoding.
2331 if g_kdIemForms[sForm][1] is not None:
2332 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2333 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2334 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2335 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2336
2337 # Stats.
2338 if not self.oReStatsName.match(sStats):
2339 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2340 elif oInstr.sStats is None:
2341 oInstr.sStats = sStats;
2342 elif oInstr.sStats != sStats:
2343 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2344 % (sMacro, oInstr.sStats, sStats,));
2345
2346 # Process the hints (simply merge with @ophints w/o checking anything).
2347 for sHint in sDisHints.split('|'):
2348 sHint = sHint.strip();
2349 if sHint.startswith('DISOPTYPE_'):
2350 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2351 if sShortHint in g_kdHints:
2352 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2353 else:
2354 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2355 elif sHint != '0':
2356 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2357
2358 for sHint in sIemHints.split('|'):
2359 sHint = sHint.strip();
2360 if sHint.startswith('IEMOPHINT_'):
2361 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2362 if sShortHint in g_kdHints:
2363 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2364 else:
2365 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2366 elif sHint != '0':
2367 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2368
2369
2370 return True;
2371
2372 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2373 """
2374 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2375 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2376 """
2377 if asOperands == 0:
2378 return self.workerIemOpMnemonicEx(sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2379 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2380 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2381
2382 def checkCodeForMacro(self, sCode):
2383 """
2384 Checks code for relevant macro invocation.
2385 """
2386 #
2387 # Scan macro invocations.
2388 #
2389 if sCode.find('(') > 0:
2390 # Look for instruction decoder function definitions. ASSUME single line.
2391 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2392 [ 'FNIEMOP_DEF',
2393 'FNIEMOP_STUB',
2394 'FNIEMOP_STUB_1',
2395 'FNIEMOP_UD_STUB',
2396 'FNIEMOP_UD_STUB_1' ]);
2397 if asArgs is not None:
2398 sFunction = asArgs[1];
2399
2400 if len(self.aoCurInstrs) == 0:
2401 self.addInstruction();
2402 for oInstr in self.aoCurInstrs:
2403 if oInstr.iLineFnIemOpMacro == -1:
2404 oInstr.iLineFnIemOpMacro = self.iLine;
2405 else:
2406 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2407 self.setInstrunctionAttrib('sFunction', sFunction);
2408 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2409 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2410 if asArgs[0].find('STUB') > 0:
2411 self.doneInstructions();
2412 return True;
2413
2414 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2415 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2416 if asArgs is not None:
2417 if len(self.aoCurInstrs) == 1:
2418 oInstr = self.aoCurInstrs[0];
2419 if oInstr.sStats is None:
2420 oInstr.sStats = asArgs[1];
2421 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2422
2423 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2424 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2425 if asArgs is not None:
2426 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2427 []);
2428 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2429 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2430 if asArgs is not None:
2431 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2432 [asArgs[6],]);
2433 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) \
2434 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2435 if asArgs is not None:
2436 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2437 [asArgs[6], asArgs[7]]);
2438 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) \
2439 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2440 if asArgs is not None:
2441 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2442 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2443 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) \
2444 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2445 if asArgs is not None:
2446 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2447 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2448
2449 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2450 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2451 if asArgs is not None:
2452 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2453 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2454 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2455 if asArgs is not None:
2456 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2457 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) \
2458 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2459 if asArgs is not None:
2460 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2461 [asArgs[4], asArgs[5],]);
2462 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) \
2463 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2464 if asArgs is not None:
2465 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2466 [asArgs[4], asArgs[5], asArgs[6],]);
2467 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) \
2468 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2469 if asArgs is not None:
2470 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2471 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2472
2473 return False;
2474
2475
2476 def parse(self):
2477 """
2478 Parses the given file.
2479 Returns number or errors.
2480 Raises exception on fatal trouble.
2481 """
2482 self.debug('Parsing %s' % (self.sSrcFile,));
2483
2484 while self.iLine < len(self.asLines):
2485 sLine = self.asLines[self.iLine];
2486 self.iLine += 1;
2487
2488 # We only look for comments, so only lines with a slash might possibly
2489 # influence the parser state.
2490 if sLine.find('/') >= 0:
2491 #self.debug('line %d: slash' % (self.iLine,));
2492
2493 offLine = 0;
2494 while offLine < len(sLine):
2495 if self.iState == self.kiCode:
2496 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2497 if offHit >= 0:
2498 self.checkCodeForMacro(sLine[offLine:offHit]);
2499 self.sComment = '';
2500 self.iCommentLine = self.iLine;
2501 self.iState = self.kiCommentMulti;
2502 offLine = offHit + 2;
2503 else:
2504 self.checkCodeForMacro(sLine[offLine:]);
2505 offLine = len(sLine);
2506
2507 elif self.iState == self.kiCommentMulti:
2508 offHit = sLine.find('*/', offLine);
2509 if offHit >= 0:
2510 self.sComment += sLine[offLine:offHit];
2511 self.iState = self.kiCode;
2512 offLine = offHit + 2;
2513 self.parseComment();
2514 else:
2515 self.sComment += sLine[offLine:];
2516 offLine = len(sLine);
2517 else:
2518 assert False;
2519
2520 # No slash, but append the line if in multi-line comment.
2521 elif self.iState == self.kiCommentMulti:
2522 #self.debug('line %d: multi' % (self.iLine,));
2523 self.sComment += sLine;
2524
2525 # No slash, but check code line for relevant macro.
2526 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2527 #self.debug('line %d: macro' % (self.iLine,));
2528 self.checkCodeForMacro(sLine);
2529
2530 # If the line is a '}' in the first position, complete the instructions.
2531 elif self.iState == self.kiCode and sLine[0] == '}':
2532 #self.debug('line %d: }' % (self.iLine,));
2533 self.doneInstructions();
2534
2535 self.doneInstructions();
2536 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2537 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2538 return self.printErrors();
2539
2540
2541def __parseFileByName(sSrcFile, sDefaultMap):
2542 """
2543 Parses one source file for instruction specfications.
2544 """
2545 #
2546 # Read sSrcFile into a line array.
2547 #
2548 try:
2549 oFile = open(sSrcFile, "r");
2550 except Exception as oXcpt:
2551 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2552 try:
2553 asLines = oFile.readlines();
2554 except Exception as oXcpt:
2555 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2556 finally:
2557 oFile.close();
2558
2559 #
2560 # Do the parsing.
2561 #
2562 try:
2563 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2564 except ParserException as oXcpt:
2565 print(str(oXcpt));
2566 raise;
2567 except Exception as oXcpt:
2568 raise;
2569
2570 return cErrors;
2571
2572
2573def __parseAll():
2574 """
2575 Parses all the IEMAllInstruction*.cpp.h files.
2576
2577 Raises exception on failure.
2578 """
2579 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2580 cErrors = 0;
2581 for sDefaultMap, sName in [
2582 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2583 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2584 ]:
2585 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2586
2587 if cErrors != 0:
2588 #raise Exception('%d parse errors' % (cErrors,));
2589 sys.exit(1);
2590 return True;
2591
2592
2593
2594__parseAll();
2595
2596
2597#
2598# Generators (may perhaps move later).
2599#
2600def generateDisassemblerTables(oDstFile = sys.stdout):
2601 """
2602 Generates disassembler tables.
2603 """
2604
2605 for sName, oMap in sorted(iter(g_dInstructionMaps.items()), key = lambda k_v: k_v[1].sEncoding + ''.join(k_v[1].asLeadOpcodes)):
2606 asLines = [];
2607
2608 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2609 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2610 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2611 asLines.append('{');
2612
2613 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2614
2615 aoTableOrder = oMap.getInstructionsInTableOrder();
2616 for iInstr, oInstr in enumerate(aoTableOrder):
2617
2618 if (iInstr & 0xf) == 0:
2619 if iInstr != 0:
2620 asLines.append('');
2621 asLines.append(' /* %x */' % (iInstr >> 4,));
2622
2623 if oInstr is None:
2624 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2625 elif isinstance(oInstr, list):
2626 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2627 else:
2628 sMacro = 'OP';
2629 cMaxOperands = 3;
2630 if len(oInstr.aoOperands) > 3:
2631 sMacro = 'OPVEX'
2632 cMaxOperands = 4;
2633 assert len(oInstr.aoOperands) <= cMaxOperands;
2634
2635 #
2636 # Format string.
2637 #
2638 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2639 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2640 sTmp += ' ' if iOperand == 0 else ',';
2641 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2642 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2643 else:
2644 sTmp += g_kdOpTypes[oOperand.sType][2];
2645 sTmp += '",';
2646 asColumns = [ sTmp, ];
2647
2648 #
2649 # Decoders.
2650 #
2651 iStart = len(asColumns);
2652 if oInstr.sEncoding is None:
2653 pass;
2654 elif oInstr.sEncoding == 'ModR/M':
2655 # ASSUME the first operand is using the ModR/M encoding
2656 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
2657 asColumns.append('IDX_ParseModRM,');
2658 ## @todo IDX_ParseVexDest
2659 # Is second operand using ModR/M too?
2660 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
2661 asColumns.append('IDX_UseModRM,')
2662 elif oInstr.sEncoding in [ 'prefix', ]:
2663 for oOperand in oInstr.aoOperands:
2664 asColumns.append('0,');
2665 elif oInstr.sEncoding in [ 'fixed' ]:
2666 pass;
2667 elif oInstr.sEncoding == 'vex2':
2668 asColumns.append('IDX_ParseVex2b,')
2669 elif oInstr.sEncoding == 'vex3':
2670 asColumns.append('IDX_ParseVex3b,')
2671 elif oInstr.sEncoding in g_dInstructionMaps:
2672 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
2673 else:
2674 ## @todo
2675 #IDX_ParseTwoByteEsc,
2676 #IDX_ParseGrp1,
2677 #IDX_ParseShiftGrp2,
2678 #IDX_ParseGrp3,
2679 #IDX_ParseGrp4,
2680 #IDX_ParseGrp5,
2681 #IDX_Parse3DNow,
2682 #IDX_ParseGrp6,
2683 #IDX_ParseGrp7,
2684 #IDX_ParseGrp8,
2685 #IDX_ParseGrp9,
2686 #IDX_ParseGrp10,
2687 #IDX_ParseGrp12,
2688 #IDX_ParseGrp13,
2689 #IDX_ParseGrp14,
2690 #IDX_ParseGrp15,
2691 #IDX_ParseGrp16,
2692 #IDX_ParseThreeByteEsc4,
2693 #IDX_ParseThreeByteEsc5,
2694 #IDX_ParseModFence,
2695 #IDX_ParseEscFP,
2696 #IDX_ParseNopPause,
2697 #IDX_ParseInvOpModRM,
2698 assert False, str(oInstr);
2699
2700 # Check for immediates and stuff in the remaining operands.
2701 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
2702 sIdx = g_kdOpTypes[oOperand.sType][0];
2703 if sIdx != 'IDX_UseModRM':
2704 asColumns.append(sIdx + ',');
2705 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
2706
2707 #
2708 # Opcode and operands.
2709 #
2710 assert oInstr.sDisEnum, str(oInstr);
2711 asColumns.append(oInstr.sDisEnum + ',');
2712 iStart = len(asColumns)
2713 for oOperand in oInstr.aoOperands:
2714 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
2715 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
2716
2717 #
2718 # Flags.
2719 #
2720 sTmp = '';
2721 for sHint in sorted(oInstr.dHints.keys()):
2722 sDefine = g_kdHints[sHint];
2723 if sDefine.startswith('DISOPTYPE_'):
2724 if sTmp:
2725 sTmp += ' | ' + sDefine;
2726 else:
2727 sTmp += sDefine;
2728 if sTmp:
2729 sTmp += '),';
2730 else:
2731 sTmp += '0),';
2732 asColumns.append(sTmp);
2733
2734 #
2735 # Format the columns into a line.
2736 #
2737 sLine = '';
2738 for i, s in enumerate(asColumns):
2739 if len(sLine) < aoffColumns[i]:
2740 sLine += ' ' * (aoffColumns[i] - len(sLine));
2741 else:
2742 sLine += ' ';
2743 sLine += s;
2744
2745 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE, DISOPTYPE_HARMLESS),
2746 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
2747 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
2748
2749 asLines.append(sLine);
2750
2751 asLines.append('};');
2752 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
2753
2754 #
2755 # Write out the lines.
2756 #
2757 oDstFile.write('\n'.join(asLines));
2758 oDstFile.write('\n');
2759 break; #for now
2760
2761if __name__ == '__main__':
2762 generateDisassemblerTables();
2763
2764
2765
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette