VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65989

Last change on this file since 65989 was 65962, checked in by vboxsync, 8 years ago

IEMAllInstructionsPython.py: pylint 2.0.0 fixes

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 112.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 65962 2017-03-07 10:13:17Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 65962 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## \@op[1-4] locations
79g_kdOpLocations = {
80 'reg': [], ## modrm.reg
81 'rm': [], ## modrm.rm
82 'imm': [], ## immediate instruction data
83 'vvvv': [], ## VEX.vvvv
84
85 # fixed registers.
86 'AL': [],
87 'rAX': [],
88 'rSI': [],
89 'rDI': [],
90 'rFLAGS': [],
91 'CS': [],
92 'DS': [],
93 'ES': [],
94 'FS': [],
95 'GS': [],
96 'SS': [],
97};
98
99## \@op[1-4] types
100##
101## Value fields:
102## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
103## - 1: the location (g_kdOpLocations).
104## - 2: disassembler format string version of the type.
105## - 3: disassembler OP_PARAM_XXX (XXX only).
106##
107## Note! See the A.2.1 in SDM vol 2 for the type names.
108g_kdOpTypes = {
109 # Fixed addresses
110 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
111
112 # ModR/M.rm
113 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
114 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
115 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
116
117 # ModR/M.rm - memory only.
118 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
119
120 # ModR/M.reg
121 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
122 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
123 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
124
125 # Immediate values.
126 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
127 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
128 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
129 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
130 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
131 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
132
133 # Address operands (no ModR/M).
134 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
135 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
136
137 # Relative jump targets
138 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
139 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
140
141 # DS:rSI
142 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
143 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
144 # ES:rDI
145 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
146 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
147
148 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
149
150 # Fixed registers.
151 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
152 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
153 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
154 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
155 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
156 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
157 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
158 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
159};
160
161# IDX_ParseFixedReg
162# IDX_ParseVexDest
163
164
165## IEMFORM_XXX mappings.
166g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
167 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
168 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
169 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
170 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
171 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
172 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
173 'M': ( 'ModR/M', [ 'rm', ], ),
174 'M_REG': ( 'ModR/M', [ 'rm', ], ),
175 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
176 'R': ( 'ModR/M', [ 'reg', ], ),
177 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
178 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
179 'FIXED': ( 'fixed', None, )
180};
181
182## \@oppfx values.
183g_kdPrefixes = {
184 '0x66': [],
185 '0xf3': [],
186 '0xf2': [],
187};
188
189## Special \@opcode tag values.
190g_kdSpecialOpcodes = {
191 '/reg': [],
192 'mr/reg': [],
193 '11 /reg': [],
194 '!11 /reg': [],
195 '11 mr/reg': [],
196 '!11 mr/reg': [],
197};
198
199## Valid values for \@openc
200g_kdEncodings = {
201 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
202 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
203 'prefix': [ None, ], ##< Prefix
204};
205
206## \@opunused, \@opinvalid, \@opinvlstyle
207g_kdInvalidStyles = {
208 'immediate': [], ##< CPU stops decoding immediately after the opcode.
209 'intel-modrm': [], ##< Intel decodes ModR/M.
210 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
211 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
212 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
213};
214
215g_kdCpuNames = {
216 '8086': (),
217 '80186': (),
218 '80286': (),
219 '80386': (),
220 '80486': (),
221};
222
223## \@opcpuid
224g_kdCpuIdFlags = {
225 'vme': 'X86_CPUID_FEATURE_EDX_VME',
226 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
227 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
228 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
229 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
230 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
231 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
232 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
233 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
234 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
235 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
236 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
237 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
238 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
239 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
240 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
241 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
242 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
243 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
244 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
245 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
246 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
247 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
248 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
249 'aes': 'X86_CPUID_FEATURE_ECX_AES',
250 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
251 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
252 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
253 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
254
255 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
256 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
257 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
258 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
259 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
260 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
261 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
262 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
263 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
264 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
265};
266
267## \@ophints values.
268g_kdHints = {
269 'invalid': 'DISOPTYPE_INVALID', ##<
270 'harmless': 'DISOPTYPE_HARMLESS', ##<
271 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
272 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
273 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
274 'portio': 'DISOPTYPE_PORTIO', ##<
275 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
276 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
277 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
278 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
279 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
280 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
281 'illegal': 'DISOPTYPE_ILLEGAL', ##<
282 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
283 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
284 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
285 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
286 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
287 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
288 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
289 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
290 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
291 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
292 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
293 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
294 ## (only in 16 & 32 bits mode!)
295 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
296 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
297 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
298 'ignores_op_size': '', ##< Ignores both operand size prefixes.
299};
300
301
302def _isValidOpcodeByte(sOpcode):
303 """
304 Checks if sOpcode is a valid lower case opcode byte.
305 Returns true/false.
306 """
307 if len(sOpcode) == 4:
308 if sOpcode[:2] == '0x':
309 if sOpcode[2] in '0123456789abcdef':
310 if sOpcode[3] in '0123456789abcdef':
311 return True;
312 return False;
313
314
315class InstructionMap(object):
316 """
317 Instruction map.
318
319 The opcode map provides the lead opcode bytes (empty for the one byte
320 opcode map). An instruction can be member of multiple opcode maps as long
321 as it uses the same opcode value within the map (because of VEX).
322 """
323
324 kdEncodings = {
325 'legacy': [],
326 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
327 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
328 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
329 'xop8': [], ##< XOP prefix with vvvvv = 8
330 'xop9': [], ##< XOP prefix with vvvvv = 9
331 'xop10': [], ##< XOP prefix with vvvvv = 10
332 };
333 ## Selectors.
334 ## The first value is the number of table entries required by a
335 ## decoder or disassembler for this type of selector.
336 kdSelectors = {
337 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
338 '/r': [ 8, ], ##< modrm.reg selects the instruction.
339 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
340 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
341 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
342 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
343 };
344
345 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
346 assert sSelector in self.kdSelectors;
347 assert sEncoding in self.kdEncodings;
348 if asLeadOpcodes is None:
349 asLeadOpcodes = [];
350 else:
351 for sOpcode in asLeadOpcodes:
352 assert _isValidOpcodeByte(sOpcode);
353 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
354
355 self.sName = sName;
356 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
357 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
358 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
359 self.aoInstructions = []; # type: Instruction
360 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
361
362 def getTableSize(self):
363 """
364 Number of table entries. This corresponds directly to the selector.
365 """
366 return self.kdSelectors[self.sSelector][0];
367
368 def getInstructionIndex(self, oInstr):
369 """
370 Returns the table index for the instruction.
371 """
372 bOpcode = oInstr.getOpcodeByte();
373
374 # The byte selector is simple. We need a full opcode byte and need just return it.
375 if self.sSelector == 'byte':
376 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
377 return bOpcode;
378
379 # The other selectors needs masking and shifting.
380 if self.sSelector == '/r':
381 return (bOpcode >> 3) & 0x7;
382
383 if self.sSelector == 'mod /r':
384 return (bOpcode >> 3) & 0x1f;
385
386 if self.sSelector == '!11 /r':
387 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
388 return (bOpcode >> 3) & 0x7;
389
390 if self.sSelector == '11 /r':
391 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
392 return (bOpcode >> 3) & 0x7;
393
394 if self.sSelector == '11':
395 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
396 return bOpcode & 0x3f;
397
398 assert False, self.sSelector;
399 return -1;
400
401 def getInstructionsInTableOrder(self):
402 """
403 Get instructions in table order.
404
405 Returns array of instructions. Normally there is exactly one
406 instruction per entry. However the entry could also be None if
407 not instruction was specified for that opcode value. Or there
408 could be a list of instructions to deal with special encodings
409 where for instance prefix (e.g. REX.W) encodes a different
410 instruction or different CPUs have different instructions or
411 prefixes in the same place.
412 """
413 # Start with empty table.
414 cTable = self.getTableSize();
415 aoTable = [None] * cTable;
416
417 # Insert the instructions.
418 for oInstr in self.aoInstructions:
419 if oInstr.sOpcode:
420 idxOpcode = self.getInstructionIndex(oInstr);
421 assert idxOpcode < cTable, str(idxOpcode);
422
423 oExisting = aoTable[idxOpcode];
424 if oExisting is None:
425 aoTable[idxOpcode] = oInstr;
426 elif not isinstance(oExisting, list):
427 aoTable[idxOpcode] = list([oExisting, oInstr]);
428 else:
429 oExisting.append(oInstr);
430
431 return aoTable;
432
433
434 def getDisasTableName(self):
435 """
436 Returns the disassembler table name for this map.
437 """
438 sName = 'g_aDisas';
439 for sWord in self.sName.split('_'):
440 if sWord == 'm': # suffix indicating modrm.mod==mem
441 sName += '_m';
442 elif sWord == 'r': # suffix indicating modrm.mod==reg
443 sName += '_r';
444 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
445 sName += '_' + sWord;
446 else:
447 sWord = sWord.replace('grp', 'Grp');
448 sWord = sWord.replace('map', 'Map');
449 sName += sWord[0].upper() + sWord[1:];
450 return sName;
451
452
453class TestType(object):
454 """
455 Test value type.
456
457 This base class deals with integer like values. The fUnsigned constructor
458 parameter indicates the default stance on zero vs sign extending. It is
459 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
460 """
461 def __init__(self, sName, acbSizes = None, fUnsigned = True):
462 self.sName = sName;
463 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
464 self.fUnsigned = fUnsigned;
465
466 class BadValue(Exception):
467 """ Bad value exception. """
468 def __init__(self, sMessage):
469 Exception.__init__(self, sMessage);
470 self.sMessage = sMessage;
471
472 ## For ascii ~ operator.
473 kdHexInv = {
474 '0': 'f',
475 '1': 'e',
476 '2': 'd',
477 '3': 'c',
478 '4': 'b',
479 '5': 'a',
480 '6': '9',
481 '7': '8',
482 '8': '7',
483 '9': '6',
484 'a': '5',
485 'b': '4',
486 'c': '3',
487 'd': '2',
488 'e': '1',
489 'f': '0',
490 };
491
492 def get(self, sValue):
493 """
494 Get the shortest normal sized byte representation of oValue.
495
496 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
497 The latter form is for AND+OR pairs where the first entry is what to
498 AND with the field and the second the one or OR with.
499
500 Raises BadValue if invalid value.
501 """
502 if not sValue:
503 raise TestType.BadValue('empty value');
504
505 # Deal with sign and detect hexadecimal or decimal.
506 fSignExtend = not self.fUnsigned;
507 if sValue[0] == '-' or sValue[0] == '+':
508 fSignExtend = True;
509 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
510 else:
511 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
512
513 # try convert it to long integer.
514 try:
515 iValue = long(sValue, 16 if fHex else 10);
516 except Exception as oXcpt:
517 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
518
519 # Convert the hex string and pad it to a decent value. Negative values
520 # needs to be manually converted to something non-negative (~-n + 1).
521 if iValue >= 0:
522 sHex = hex(iValue);
523 if sys.version_info[0] < 3:
524 assert sHex[-1] == 'L';
525 sHex = sHex[:-1];
526 assert sHex[:2] == '0x';
527 sHex = sHex[2:];
528 else:
529 sHex = hex(-iValue - 1);
530 if sys.version_info[0] < 3:
531 assert sHex[-1] == 'L';
532 sHex = sHex[:-1];
533 assert sHex[:2] == '0x';
534 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
535
536 cDigits = len(sHex);
537 if cDigits <= self.acbSizes[-1] * 2:
538 for cb in self.acbSizes:
539 if cDigits <= cb * 2:
540 cDigits = int((cDigits + cb - 1) / cb) * cb; # Seems like integer division returns a float in python.
541 break;
542 else:
543 cDigits = int((cDigits + self.acbSizes[-1] - 1) / self.acbSizes[-1]) * self.acbSizes[-1];
544 assert isinstance(cDigits, int)
545
546 if cDigits != len(sHex):
547 cNeeded = cDigits - len(sHex);
548 if iValue >= 0:
549 sHex = ('0' * cNeeded) + sHex;
550 else:
551 sHex = ('f' * cNeeded) + sHex;
552
553 # Invert and convert to bytearray and return it.
554 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
555
556 return ((fSignExtend, abValue),);
557
558 def validate(self, sValue):
559 """
560 Returns True if value is okay, error message on failure.
561 """
562 try:
563 self.get(sValue);
564 except TestType.BadValue as oXcpt:
565 return oXcpt.sMessage;
566 return True;
567
568 def isAndOrPair(self, sValue):
569 """
570 Checks if sValue is a pair.
571 """
572 _ = sValue;
573 return False;
574
575
576class TestTypeEflags(TestType):
577 """
578 Special value parsing for EFLAGS/RFLAGS/FLAGS.
579 """
580
581 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
582
583 def __init__(self, sName):
584 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
585
586 def get(self, sValue):
587 fClear = 0;
588 fSet = 0;
589 for sFlag in sValue.split(','):
590 sConstant = SimpleParser.kdEFlags.get(sFlag, None);
591 if sConstant is None:
592 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
593 if sConstant[0] == '!':
594 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
595 else:
596 fSet |= g_kdX86EFlagsConstants[sConstant];
597
598 aoSet = TestType.get(self, '0x%x' % (fSet,));
599 if fClear != 0:
600 aoClear = TestType.get(self, '%#x' % (~fClear))
601 assert self.isAndOrPair(sValue) is True;
602 return (aoClear[0], aoSet[0]);
603 assert self.isAndOrPair(sValue) is False;
604 return aoSet;
605
606 def isAndOrPair(self, sValue):
607 for sZeroFlag in self.kdZeroValueFlags:
608 if sValue.find(sZeroFlag) >= 0:
609 return True;
610 return False;
611
612
613
614class TestInOut(object):
615 """
616 One input or output state modifier.
617
618 This should be thought as values to modify BS3REGCTX and extended (needs
619 to be structured) state.
620 """
621 ## Assigned operators.
622 kasOperators = [
623 '&|=', # Special AND+OR operator for use with EFLAGS.
624 '&~=',
625 '&=',
626 '|=',
627 '='
628 ];
629 ## Types
630 kdTypes = {
631 'uint': TestType('uint', fUnsigned = True),
632 'int': TestType('int'),
633 'efl': TestTypeEflags('efl'),
634 };
635 ## CPU context fields.
636 kdFields = {
637 # name: ( default type, tbd, )
638 # Operands.
639 'op1': ( 'uint', '', ), ## \@op1
640 'op2': ( 'uint', '', ), ## \@op2
641 'op3': ( 'uint', '', ), ## \@op3
642 'op4': ( 'uint', '', ), ## \@op4
643 # Flags.
644 'efl': ( 'efl', '', ),
645 # 8-bit GPRs.
646 'al': ( 'uint', '', ),
647 'cl': ( 'uint', '', ),
648 'dl': ( 'uint', '', ),
649 'bl': ( 'uint', '', ),
650 'ah': ( 'uint', '', ),
651 'ch': ( 'uint', '', ),
652 'dh': ( 'uint', '', ),
653 'bh': ( 'uint', '', ),
654 'r8l': ( 'uint', '', ),
655 'r9l': ( 'uint', '', ),
656 'r10l': ( 'uint', '', ),
657 'r11l': ( 'uint', '', ),
658 'r12l': ( 'uint', '', ),
659 'r13l': ( 'uint', '', ),
660 'r14l': ( 'uint', '', ),
661 'r15l': ( 'uint', '', ),
662 # 16-bit GPRs.
663 'ax': ( 'uint', '', ),
664 'dx': ( 'uint', '', ),
665 'cx': ( 'uint', '', ),
666 'bx': ( 'uint', '', ),
667 'sp': ( 'uint', '', ),
668 'bp': ( 'uint', '', ),
669 'si': ( 'uint', '', ),
670 'di': ( 'uint', '', ),
671 'r8w': ( 'uint', '', ),
672 'r9w': ( 'uint', '', ),
673 'r10w': ( 'uint', '', ),
674 'r11w': ( 'uint', '', ),
675 'r12w': ( 'uint', '', ),
676 'r13w': ( 'uint', '', ),
677 'r14w': ( 'uint', '', ),
678 'r15w': ( 'uint', '', ),
679 # 32-bit GPRs.
680 'eax': ( 'uint', '', ),
681 'edx': ( 'uint', '', ),
682 'ecx': ( 'uint', '', ),
683 'ebx': ( 'uint', '', ),
684 'esp': ( 'uint', '', ),
685 'ebp': ( 'uint', '', ),
686 'esi': ( 'uint', '', ),
687 'edi': ( 'uint', '', ),
688 'r8d': ( 'uint', '', ),
689 'r9d': ( 'uint', '', ),
690 'r10d': ( 'uint', '', ),
691 'r11d': ( 'uint', '', ),
692 'r12d': ( 'uint', '', ),
693 'r13d': ( 'uint', '', ),
694 'r14d': ( 'uint', '', ),
695 'r15d': ( 'uint', '', ),
696 # 64-bit GPRs.
697 'rax': ( 'uint', '', ),
698 'rdx': ( 'uint', '', ),
699 'rcx': ( 'uint', '', ),
700 'rbx': ( 'uint', '', ),
701 'rsp': ( 'uint', '', ),
702 'rbp': ( 'uint', '', ),
703 'rsi': ( 'uint', '', ),
704 'rdi': ( 'uint', '', ),
705 'r8': ( 'uint', '', ),
706 'r9': ( 'uint', '', ),
707 'r10': ( 'uint', '', ),
708 'r11': ( 'uint', '', ),
709 'r12': ( 'uint', '', ),
710 'r13': ( 'uint', '', ),
711 'r14': ( 'uint', '', ),
712 'r15': ( 'uint', '', ),
713 # 16-bit, 32-bit or 64-bit registers according to operand size.
714 'oz.rax': ( 'uint', '', ),
715 'oz.rdx': ( 'uint', '', ),
716 'oz.rcx': ( 'uint', '', ),
717 'oz.rbx': ( 'uint', '', ),
718 'oz.rsp': ( 'uint', '', ),
719 'oz.rbp': ( 'uint', '', ),
720 'oz.rsi': ( 'uint', '', ),
721 'oz.rdi': ( 'uint', '', ),
722 'oz.r8': ( 'uint', '', ),
723 'oz.r9': ( 'uint', '', ),
724 'oz.r10': ( 'uint', '', ),
725 'oz.r11': ( 'uint', '', ),
726 'oz.r12': ( 'uint', '', ),
727 'oz.r13': ( 'uint', '', ),
728 'oz.r14': ( 'uint', '', ),
729 'oz.r15': ( 'uint', '', ),
730 };
731
732 def __init__(self, sField, sOp, sValue, sType):
733 assert sField in self.kdFields;
734 assert sOp in self.kasOperators;
735 self.sField = sField;
736 self.sOp = sOp;
737 self.sValue = sValue;
738 self.sType = sType;
739
740
741class TestSelector(object):
742 """
743 One selector for an instruction test.
744 """
745 ## Selector compare operators.
746 kasCompareOps = [ '==', '!=' ];
747 ## Selector variables and their valid values.
748 kdVariables = {
749 # Operand size.
750 'size': {
751 'o16': 'size_o16',
752 'o32': 'size_o32',
753 'o64': 'size_o64',
754 },
755 # Execution ring.
756 'ring': {
757 '0': 'ring_0',
758 '1': 'ring_1',
759 '2': 'ring_2',
760 '3': 'ring_3',
761 '0..2': 'ring_0_thru_2',
762 '1..3': 'ring_1_thru_3',
763 },
764 # Basic code mode.
765 'codebits': {
766 '64': 'code_64bit',
767 '32': 'code_32bit',
768 '16': 'code_16bit',
769 },
770 # cpu modes.
771 'mode': {
772 'real': 'mode_real',
773 'prot': 'mode_prot',
774 'long': 'mode_long',
775 'v86': 'mode_v86',
776 'smm': 'mode_smm',
777 'vmx': 'mode_vmx',
778 'svm': 'mode_svm',
779 },
780 # paging on/off
781 'paging': {
782 'on': 'paging_on',
783 'off': 'paging_off',
784 },
785 };
786 ## Selector shorthand predicates.
787 ## These translates into variable expressions.
788 kdPredicates = {
789 'o16': 'size==o16',
790 'o32': 'size==o32',
791 'o64': 'size==o64',
792 'ring0': 'ring==0',
793 '!ring0': 'ring==1..3',
794 'ring1': 'ring==1',
795 'ring2': 'ring==2',
796 'ring3': 'ring==3',
797 'user': 'ring==3',
798 'supervisor': 'ring==0..2',
799 'real': 'mode==real',
800 'prot': 'mode==prot',
801 'long': 'mode==long',
802 'v86': 'mode==v86',
803 'smm': 'mode==smm',
804 'vmx': 'mode==vmx',
805 'svm': 'mode==svm',
806 'paging': 'paging==on',
807 '!paging': 'paging==off',
808 };
809
810 def __init__(self, sVariable, sOp, sValue):
811 assert sVariable in self.kdVariables;
812 assert sOp in self.kasCompareOps;
813 assert sValue in self.kdVariables[sVariable];
814 self.sVariable = sVariable;
815 self.sOp = sOp;
816 self.sValue = sValue;
817
818
819class InstructionTest(object):
820 """
821 Instruction test.
822 """
823
824 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
825 self.oInstr = oInstr; # type: InstructionTest
826 self.aoInputs = [];
827 self.aoOutputs = [];
828 self.aoSelectors = []; # type: list(TestSelector)
829
830
831class Operand(object):
832 """
833 Instruction operand.
834 """
835
836 def __init__(self, sWhere, sType):
837 assert sWhere in g_kdOpLocations, sWhere;
838 assert sType in g_kdOpTypes, sType;
839 self.sWhere = sWhere; ##< g_kdOpLocations
840 self.sType = sType; ##< g_kdOpTypes
841
842 def usesModRM(self):
843 """ Returns True if using some form of ModR/M encoding. """
844 return self.sType[0] in ['E', 'G', 'M'];
845
846
847
848class Instruction(object): # pylint: disable=too-many-instance-attributes
849 """
850 Instruction.
851 """
852
853 def __init__(self, sSrcFile, iLine):
854 ## @name Core attributes.
855 ## @{
856 self.sMnemonic = None;
857 self.sBrief = None;
858 self.asDescSections = []; # type: list(str)
859 self.aoMaps = []; # type: list(InstructionMap)
860 self.aoOperands = []; # type: list(Operand)
861 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
862 self.sOpcode = None; # type: str
863 self.sEncoding = None;
864 self.asFlTest = None;
865 self.asFlModify = None;
866 self.asFlUndefined = None;
867 self.asFlSet = None;
868 self.asFlClear = None;
869 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
870 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
871 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
872 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
873 self.aoTests = []; # type: list(InstructionTest)
874 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
875 self.oCpuExpr = None; ##< Some CPU restriction expression...
876 self.sGroup = None;
877 self.fUnused = False; ##< Unused instruction.
878 self.fInvalid = False; ##< Invalid instruction (like UD2).
879 self.sInvalidStyle = None; ##< Invalid behviour style
880 ## @}
881
882 ## @name Implementation attributes.
883 ## @{
884 self.sStats = None;
885 self.sFunction = None;
886 self.fStub = False;
887 self.fUdStub = False;
888 ## @}
889
890 ## @name Decoding info
891 ## @{
892 self.sSrcFile = sSrcFile;
893 self.iLineCreated = iLine;
894 self.iLineCompleted = None;
895 self.cOpTags = 0;
896 self.iLineFnIemOpMacro = -1;
897 self.iLineMnemonicMacro = -1;
898 ## @}
899
900 ## @name Intermediate input fields.
901 ## @{
902 self.sRawDisOpNo = None;
903 self.asRawDisParams = [];
904 self.sRawIemOpFlags = None;
905 self.sRawOldOpcodes = None;
906 ## @}
907
908 def toString(self, fRepr = False):
909 """ Turn object into a string. """
910 aasFields = [];
911
912 aasFields.append(['opcode', self.sOpcode]);
913 aasFields.append(['mnemonic', self.sMnemonic]);
914 for iOperand, oOperand in enumerate(self.aoOperands):
915 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
916 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
917 aasFields.append(['encoding', self.sEncoding]);
918 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
919 aasFields.append(['disenum', self.sDisEnum]);
920 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
921 aasFields.append(['group', self.sGroup]);
922 if self.fUnused: aasFields.append(['unused', 'True']);
923 if self.fInvalid: aasFields.append(['invalid', 'True']);
924 aasFields.append(['invlstyle', self.sInvalidStyle]);
925 aasFields.append(['fltest', self.asFlTest]);
926 aasFields.append(['flmodify', self.asFlModify]);
927 aasFields.append(['flundef', self.asFlUndefined]);
928 aasFields.append(['flset', self.asFlSet]);
929 aasFields.append(['flclear', self.asFlClear]);
930 aasFields.append(['mincpu', self.sMinCpu]);
931 aasFields.append(['stats', self.sStats]);
932 aasFields.append(['sFunction', self.sFunction]);
933 if self.fStub: aasFields.append(['fStub', 'True']);
934 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
935 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
936 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
937 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
938
939 sRet = '<' if fRepr else '';
940 for sField, sValue in aasFields:
941 if sValue != None:
942 if len(sRet) > 1:
943 sRet += '; ';
944 sRet += '%s=%s' % (sField, sValue,);
945 if fRepr:
946 sRet += '>';
947
948 return sRet;
949
950 def __str__(self):
951 """ Provide string represenation. """
952 return self.toString(False);
953
954 def __repr__(self):
955 """ Provide unambigious string representation. """
956 return self.toString(True);
957
958 def getOpcodeByte(self):
959 """
960 Decodes sOpcode into a byte range integer value.
961 Raises exception if sOpcode is None or invalid.
962 """
963 if self.sOpcode is None:
964 raise Exception('No opcode byte for %s!' % (self,));
965 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
966
967 # Full hex byte form.
968 if sOpcode[:2] == '0x':
969 return int(sOpcode, 16);
970
971 # The /r form:
972 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
973 return int(sOpcode[1:]) << 3;
974
975 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
976
977
978
979## All the instructions.
980g_aoAllInstructions = []; # type: Instruction
981
982## Instruction maps.
983g_dInstructionMaps = {
984 'one': InstructionMap('one'),
985 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
986 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
987 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
988 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
989 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
990 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
991 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
992 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
993 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
994 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
995 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
996 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
997 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
998 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
999 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1000 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1001 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1002 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1003 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1004
1005 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1006 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1007 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1008 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1009 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1010 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1011 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1012 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1013 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1014 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1015 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1016 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1017 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1018 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1019
1020 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1021 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1022
1023 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1024 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1025 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1026 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1027 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1028 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1029
1030 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1031 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1032
1033 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1034 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1035 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1036 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1037 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1038 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1039 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1040};
1041
1042
1043
1044class ParserException(Exception):
1045 """ Parser exception """
1046 def __init__(self, sMessage):
1047 Exception.__init__(self, sMessage);
1048
1049
1050class SimpleParser(object):
1051 """
1052 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1053 """
1054
1055 ## @name Parser state.
1056 ## @{
1057 kiCode = 0;
1058 kiCommentMulti = 1;
1059 ## @}
1060
1061 def __init__(self, sSrcFile, asLines, sDefaultMap):
1062 self.sSrcFile = sSrcFile;
1063 self.asLines = asLines;
1064 self.iLine = 0;
1065 self.iState = self.kiCode;
1066 self.sComment = '';
1067 self.iCommentLine = 0;
1068 self.aoCurInstrs = [];
1069
1070 assert sDefaultMap in g_dInstructionMaps;
1071 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1072
1073 self.cTotalInstr = 0;
1074 self.cTotalStubs = 0;
1075 self.cTotalTagged = 0;
1076
1077 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1078 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1079 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1080 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1081 self.oReGroupName = re.compile('^op_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1082 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1083 self.fDebug = True;
1084
1085 self.dTagHandlers = {
1086 '@opbrief': self.parseTagOpBrief,
1087 '@opdesc': self.parseTagOpDesc,
1088 '@opmnemonic': self.parseTagOpMnemonic,
1089 '@op1': self.parseTagOpOperandN,
1090 '@op2': self.parseTagOpOperandN,
1091 '@op3': self.parseTagOpOperandN,
1092 '@op4': self.parseTagOpOperandN,
1093 '@oppfx': self.parseTagOpPfx,
1094 '@opmaps': self.parseTagOpMaps,
1095 '@opcode': self.parseTagOpcode,
1096 '@openc': self.parseTagOpEnc,
1097 '@opfltest': self.parseTagOpEFlags,
1098 '@opflmodify': self.parseTagOpEFlags,
1099 '@opflundef': self.parseTagOpEFlags,
1100 '@opflset': self.parseTagOpEFlags,
1101 '@opflclear': self.parseTagOpEFlags,
1102 '@ophints': self.parseTagOpHints,
1103 '@opdisenum': self.parseTagOpDisEnum,
1104 '@opmincpu': self.parseTagOpMinCpu,
1105 '@opcpuid': self.parseTagOpCpuId,
1106 '@opgroup': self.parseTagOpGroup,
1107 '@opunused': self.parseTagOpUnusedInvalid,
1108 '@opinvalid': self.parseTagOpUnusedInvalid,
1109 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1110 '@optest': self.parseTagOpTest,
1111 '@opstats': self.parseTagOpStats,
1112 '@opfunction': self.parseTagOpFunction,
1113 '@opdone': self.parseTagOpDone,
1114 };
1115
1116 self.asErrors = [];
1117
1118 def raiseError(self, sMessage):
1119 """
1120 Raise error prefixed with the source and line number.
1121 """
1122 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1123
1124 def raiseCommentError(self, iLineInComment, sMessage):
1125 """
1126 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1127 """
1128 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1129
1130 def error(self, sMessage):
1131 """
1132 Adds an error.
1133 returns False;
1134 """
1135 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1136 return False;
1137
1138 def errorComment(self, iLineInComment, sMessage):
1139 """
1140 Adds a comment error.
1141 returns False;
1142 """
1143 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1144 return False;
1145
1146 def printErrors(self):
1147 """
1148 Print the errors to stderr.
1149 Returns number of errors.
1150 """
1151 if self.asErrors:
1152 sys.stderr.write(u''.join(self.asErrors));
1153 return len(self.asErrors);
1154
1155 def debug(self, sMessage):
1156 """
1157 For debugging.
1158 """
1159 if self.fDebug:
1160 print('debug: %s' % (sMessage,));
1161
1162
1163 def addInstruction(self, iLine = None):
1164 """
1165 Adds an instruction.
1166 """
1167 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1168 g_aoAllInstructions.append(oInstr);
1169 self.aoCurInstrs.append(oInstr);
1170 return oInstr;
1171
1172 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1173 """
1174 Derives the mnemonic and operands from a IEM stats base name like string.
1175 """
1176 if oInstr.sMnemonic is None:
1177 asWords = sStats.split('_');
1178 oInstr.sMnemonic = asWords[0].lower();
1179 if len(asWords) > 1 and not oInstr.aoOperands:
1180 for sType in asWords[1:]:
1181 if sType in g_kdOpTypes:
1182 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1183 else:
1184 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1185 return False;
1186 return True;
1187
1188 def doneInstructionOne(self, oInstr, iLine):
1189 """
1190 Complete the parsing by processing, validating and expanding raw inputs.
1191 """
1192 assert oInstr.iLineCompleted is None;
1193 oInstr.iLineCompleted = iLine;
1194
1195 #
1196 # Specified instructions.
1197 #
1198 if oInstr.cOpTags > 0:
1199 if oInstr.sStats is None:
1200 pass;
1201
1202 #
1203 # Unspecified legacy stuff. We generally only got a few things to go on here.
1204 # /** Opcode 0x0f 0x00 /0. */
1205 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1206 #
1207 else:
1208 #if oInstr.sRawOldOpcodes:
1209 #
1210 #if oInstr.sMnemonic:
1211 pass;
1212
1213 #
1214 # Common defaults.
1215 #
1216
1217 # Guess mnemonic and operands from stats if the former is missing.
1218 if oInstr.sMnemonic is None:
1219 if oInstr.sStats is not None:
1220 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1221 elif oInstr.sFunction is not None:
1222 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1223
1224 # Derive the disassembler op enum constant from the mnemonic.
1225 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1226 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1227
1228 # Derive the IEM statistics base name from mnemonic and operand types.
1229 if oInstr.sStats is None:
1230 if oInstr.sFunction is not None:
1231 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1232 elif oInstr.sMnemonic is not None:
1233 oInstr.sStats = oInstr.sMnemonic;
1234 for oOperand in oInstr.aoOperands:
1235 if oOperand.sType:
1236 oInstr.sStats += '_' + oOperand.sType;
1237
1238 # Derive the IEM function name from mnemonic and operand types.
1239 if oInstr.sFunction is None:
1240 if oInstr.sMnemonic is not None:
1241 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1242 for oOperand in oInstr.aoOperands:
1243 if oOperand.sType:
1244 oInstr.sFunction += '_' + oOperand.sType;
1245 elif oInstr.sStats:
1246 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1247
1248 # Derive encoding from operands.
1249 if oInstr.sEncoding is None:
1250 if not oInstr.aoOperands:
1251 oInstr.sEncoding = 'fixed';
1252 elif oInstr.aoOperands[0].usesModRM():
1253 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1254 oInstr.sEncoding = 'ModR/M+VEX';
1255 else:
1256 oInstr.sEncoding = 'ModR/M';
1257
1258 #
1259 # Apply default map and then add the instruction to all it's groups.
1260 #
1261 if not oInstr.aoMaps:
1262 oInstr.aoMaps = [ self.oDefaultMap, ];
1263 for oMap in oInstr.aoMaps:
1264 oMap.aoInstructions.append(oInstr);
1265
1266 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1267 return True;
1268
1269 def doneInstructions(self, iLineInComment = None):
1270 """
1271 Done with current instruction.
1272 """
1273 for oInstr in self.aoCurInstrs:
1274 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1275 if oInstr.fStub:
1276 self.cTotalStubs += 1;
1277
1278 self.cTotalInstr += len(self.aoCurInstrs);
1279
1280 self.sComment = '';
1281 self.aoCurInstrs = [];
1282 return True;
1283
1284 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1285 """
1286 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1287 is False, only None values and empty strings are replaced.
1288 """
1289 for oInstr in self.aoCurInstrs:
1290 if fOverwrite is not True:
1291 oOldValue = getattr(oInstr, sAttrib);
1292 if oOldValue is not None:
1293 continue;
1294 setattr(oInstr, sAttrib, oValue);
1295
1296 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1297 """
1298 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1299 If fOverwrite is False, only None values and empty strings are replaced.
1300 """
1301 for oInstr in self.aoCurInstrs:
1302 aoArray = getattr(oInstr, sAttrib);
1303 while len(aoArray) <= iEntry:
1304 aoArray.append(None);
1305 if fOverwrite is True or aoArray[iEntry] is None:
1306 aoArray[iEntry] = oValue;
1307
1308 def parseCommentOldOpcode(self, asLines):
1309 """ Deals with 'Opcode 0xff /4' like comments """
1310 asWords = asLines[0].split();
1311 if len(asWords) >= 2 \
1312 and asWords[0] == 'Opcode' \
1313 and ( asWords[1].startswith('0x')
1314 or asWords[1].startswith('0X')):
1315 asWords = asWords[:1];
1316 for iWord, sWord in enumerate(asWords):
1317 if sWord.startswith('0X'):
1318 sWord = '0x' + sWord[:2];
1319 asWords[iWord] = asWords;
1320 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1321
1322 return False;
1323
1324 def ensureInstructionForOpTag(self, iTagLine):
1325 """ Ensure there is an instruction for the op-tag being parsed. """
1326 if not self.aoCurInstrs:
1327 self.addInstruction(self.iCommentLine + iTagLine);
1328 for oInstr in self.aoCurInstrs:
1329 oInstr.cOpTags += 1;
1330 if oInstr.cOpTags == 1:
1331 self.cTotalTagged += 1;
1332 return self.aoCurInstrs[-1];
1333
1334 @staticmethod
1335 def flattenSections(aasSections):
1336 """
1337 Flattens multiline sections into stripped single strings.
1338 Returns list of strings, on section per string.
1339 """
1340 asRet = [];
1341 for asLines in aasSections:
1342 if asLines:
1343 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1344 return asRet;
1345
1346 @staticmethod
1347 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1348 """
1349 Flattens sections into a simple stripped string with newlines as
1350 section breaks. The final section does not sport a trailing newline.
1351 """
1352 # Typical: One section with a single line.
1353 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1354 return aasSections[0][0].strip();
1355
1356 sRet = '';
1357 for iSection, asLines in enumerate(aasSections):
1358 if asLines:
1359 if iSection > 0:
1360 sRet += sSectionSep;
1361 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1362 return sRet;
1363
1364
1365
1366 ## @name Tag parsers
1367 ## @{
1368
1369 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1370 """
1371 Tag: \@opbrief
1372 Value: Text description, multiple sections, appended.
1373
1374 Brief description. If not given, it's the first sentence from @opdesc.
1375 """
1376 oInstr = self.ensureInstructionForOpTag(iTagLine);
1377
1378 # Flatten and validate the value.
1379 sBrief = self.flattenAllSections(aasSections);
1380 if not sBrief:
1381 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1382 if sBrief[-1] != '.':
1383 sBrief = sBrief + '.';
1384 if len(sBrief) > 180:
1385 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1386 offDot = sBrief.find('.');
1387 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1388 offDot = sBrief.find('.', offDot + 1);
1389 if offDot >= 0 and offDot != len(sBrief) - 1:
1390 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1391
1392 # Update the instruction.
1393 if oInstr.sBrief is not None:
1394 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1395 % (sTag, oInstr.sBrief, sBrief,));
1396 _ = iEndLine;
1397 return True;
1398
1399 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1400 """
1401 Tag: \@opdesc
1402 Value: Text description, multiple sections, appended.
1403
1404 It is used to describe instructions.
1405 """
1406 oInstr = self.ensureInstructionForOpTag(iTagLine);
1407 if aasSections:
1408 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1409 return True;
1410
1411 _ = sTag; _ = iEndLine;
1412 return True;
1413
1414 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1415 """
1416 Tag: @opmenmonic
1417 Value: mnemonic
1418
1419 The 'mnemonic' value must be a valid C identifier string. Because of
1420 prefixes, groups and whatnot, there times when the mnemonic isn't that
1421 of an actual assembler mnemonic.
1422 """
1423 oInstr = self.ensureInstructionForOpTag(iTagLine);
1424
1425 # Flatten and validate the value.
1426 sMnemonic = self.flattenAllSections(aasSections);
1427 if not self.oReMnemonic.match(sMnemonic):
1428 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1429 if oInstr.sMnemonic is not None:
1430 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1431 % (sTag, oInstr.sMnemonic, sMnemonic,));
1432 oInstr.sMnemonic = sMnemonic
1433
1434 _ = iEndLine;
1435 return True;
1436
1437 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1438 """
1439 Tags: \@op1, \@op2, \@op3, \@op4
1440 Value: [where:]type
1441
1442 The 'where' value indicates where the operand is found, like the 'reg'
1443 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1444 a list.
1445
1446 The 'type' value indicates the operand type. These follow the types
1447 given in the opcode tables in the CPU reference manuals.
1448 See Instruction.kdOperandTypes for a list.
1449
1450 """
1451 oInstr = self.ensureInstructionForOpTag(iTagLine);
1452 idxOp = int(sTag[-1]) - 1;
1453 assert idxOp >= 0 and idxOp < 4;
1454
1455 # flatten, split up, and validate the "where:type" value.
1456 sFlattened = self.flattenAllSections(aasSections);
1457 asSplit = sFlattened.split(':');
1458 if len(asSplit) == 1:
1459 sType = asSplit[0];
1460 sWhere = None;
1461 elif len(asSplit) == 2:
1462 (sWhere, sType) = asSplit;
1463 else:
1464 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1465
1466 if sType not in g_kdOpTypes:
1467 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1468 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1469 if sWhere is None:
1470 sWhere = g_kdOpTypes[sType][1];
1471 elif sWhere not in g_kdOpLocations:
1472 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1473 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1474
1475 # Insert the operand, refusing to overwrite an existing one.
1476 while idxOp >= len(oInstr.aoOperands):
1477 oInstr.aoOperands.append(None);
1478 if oInstr.aoOperands[idxOp] is not None:
1479 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1480 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1481 sWhere, sType,));
1482 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1483
1484 _ = iEndLine;
1485 return True;
1486
1487 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1488 """
1489 Tag: \@opmaps
1490 Value: map[,map2]
1491
1492 Indicates which maps the instruction is in. There is a default map
1493 associated with each input file.
1494 """
1495 oInstr = self.ensureInstructionForOpTag(iTagLine);
1496
1497 # Flatten, split up and validate the value.
1498 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1499 asMaps = sFlattened.split(',');
1500 if not asMaps:
1501 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1502 for sMap in asMaps:
1503 if sMap not in g_dInstructionMaps:
1504 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1505 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1506
1507 # Add the maps to the current list. Throw errors on duplicates.
1508 for oMap in oInstr.aoMaps:
1509 if oMap.sName in asMaps:
1510 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1511
1512 for sMap in asMaps:
1513 oMap = g_dInstructionMaps[sMap];
1514 if oMap not in oInstr.aoMaps:
1515 oInstr.aoMaps.append(oMap);
1516 else:
1517 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1518
1519 _ = iEndLine;
1520 return True;
1521
1522 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1523 """
1524 Tag: \@oppfx
1525 Value: none|0x66|0xf3|0xf2
1526
1527 Required prefix for the instruction. (In a (E)VEX context this is the
1528 value of the 'pp' field rather than an actual prefix.)
1529 """
1530 oInstr = self.ensureInstructionForOpTag(iTagLine);
1531
1532 # Flatten and validate the value.
1533 sFlattened = self.flattenAllSections(aasSections);
1534 asPrefixes = sFlattened.split();
1535 if len(asPrefixes) > 1:
1536 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1537
1538 sPrefix = asPrefixes[0].lower();
1539 if sPrefix == 'none':
1540 sPrefix = None;
1541 else:
1542 if len(sPrefix) == 2:
1543 sPrefix = '0x' + sPrefix;
1544 if _isValidOpcodeByte(sPrefix):
1545 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1546
1547 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1548 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1549
1550 # Set it.
1551 if oInstr.sPrefix is not None:
1552 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1553 oInstr.sPrefix = sPrefix;
1554
1555 _ = iEndLine;
1556 return True;
1557
1558 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1559 """
1560 Tag: \@opcode
1561 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1562
1563 The opcode byte or sub-byte for the instruction in the context of a map.
1564 """
1565 oInstr = self.ensureInstructionForOpTag(iTagLine);
1566
1567 # Flatten and validate the value.
1568 sOpcode = self.flattenAllSections(aasSections);
1569 if sOpcode in g_kdSpecialOpcodes:
1570 pass;
1571 elif not _isValidOpcodeByte(sOpcode):
1572 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1573
1574 # Set it.
1575 if oInstr.sOpcode is not None:
1576 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1577 oInstr.sOpcode = sOpcode;
1578
1579 _ = iEndLine;
1580 return True;
1581
1582 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1583 """
1584 Tag: \@openc
1585 Value: ModR/M|fixed|prefix|<map name>
1586
1587 The instruction operand encoding style.
1588 """
1589 oInstr = self.ensureInstructionForOpTag(iTagLine);
1590
1591 # Flatten and validate the value.
1592 sEncoding = self.flattenAllSections(aasSections);
1593 if sEncoding in g_kdEncodings:
1594 pass;
1595 elif sEncoding in g_dInstructionMaps:
1596 pass;
1597 elif not _isValidOpcodeByte(sEncoding):
1598 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1599
1600 # Set it.
1601 if oInstr.sEncoding is not None:
1602 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1603 % ( sTag, oInstr.sEncoding, sEncoding,));
1604 oInstr.sEncoding = sEncoding;
1605
1606 _ = iEndLine;
1607 return True;
1608
1609 ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1610 kdEFlags = {
1611 # Debugger flag notation:
1612 'ov': 'X86_EFL_OF', ##< OVerflow.
1613 'nv': '!X86_EFL_OF', ##< No Overflow.
1614
1615 'ng': 'X86_EFL_SF', ##< NeGative (sign).
1616 'pl': '!X86_EFL_SF', ##< PLuss (sign).
1617
1618 'zr': 'X86_EFL_ZF', ##< ZeRo.
1619 'nz': '!X86_EFL_ZF', ##< No Zero.
1620
1621 'af': 'X86_EFL_AF', ##< Aux Flag.
1622 'na': '!X86_EFL_AF', ##< No Aux.
1623
1624 'po': 'X86_EFL_PF', ##< Parity Pdd.
1625 'pe': '!X86_EFL_PF', ##< Parity Even.
1626
1627 'cf': 'X86_EFL_CF', ##< Carry Flag.
1628 'nc': '!X86_EFL_CF', ##< No Carry.
1629
1630 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1631 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1632
1633 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1634 'up': '!X86_EFL_DF', ##< UP (string op direction).
1635
1636 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1637 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1638 'ac': 'X86_EFL_AC', ##< Alignment Check.
1639 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1640 'rf': 'X86_EFL_RF', ##< Resume Flag.
1641 'nt': 'X86_EFL_NT', ##< Nested Task.
1642 'tf': 'X86_EFL_TF', ##< Trap flag.
1643
1644 # Reference manual notation:
1645 'of': 'X86_EFL_OF',
1646 'sf': 'X86_EFL_SF',
1647 'zf': 'X86_EFL_ZF',
1648 'pf': 'X86_EFL_PF',
1649 'if': 'X86_EFL_IF',
1650 'df': 'X86_EFL_DF',
1651 'iopl': 'X86_EFL_IOPL',
1652 'id': 'X86_EFL_ID',
1653 };
1654
1655 ## EFlags tag to Instruction attribute name.
1656 kdOpFlagToAttr = {
1657 '@opfltest': 'asFlTest',
1658 '@opflmodify': 'asFlModify',
1659 '@opflundef': 'asFlUndefined',
1660 '@opflset': 'asFlSet',
1661 '@opflclear': 'asFlClear',
1662 };
1663
1664 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1665 """
1666 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1667 Value: <eflags specifier>
1668
1669 """
1670 oInstr = self.ensureInstructionForOpTag(iTagLine);
1671
1672 # Flatten, split up and validate the values.
1673 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1674 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1675 asFlags = [];
1676 else:
1677 fRc = True;
1678 for iFlag, sFlag in enumerate(asFlags):
1679 if sFlag not in self.kdEFlags:
1680 if sFlag.strip() in self.kdEFlags:
1681 asFlags[iFlag] = sFlag.strip();
1682 else:
1683 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1684 if not fRc:
1685 return False;
1686
1687 # Set them.
1688 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1689 if asOld is not None:
1690 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1691 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1692
1693 _ = iEndLine;
1694 return True;
1695
1696 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1697 """
1698 Tag: \@ophints
1699 Value: Comma or space separated list of flags and hints.
1700
1701 This covers the disassembler flags table and more.
1702 """
1703 oInstr = self.ensureInstructionForOpTag(iTagLine);
1704
1705 # Flatten as a space separated list, split it up and validate the values.
1706 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1707 if len(asHints) == 1 and asHints[0].lower() == 'none':
1708 asHints = [];
1709 else:
1710 fRc = True;
1711 for iHint, sHint in enumerate(asHints):
1712 if sHint not in g_kdHints:
1713 if sHint.strip() in g_kdHints:
1714 sHint[iHint] = sHint.strip();
1715 else:
1716 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1717 if not fRc:
1718 return False;
1719
1720 # Append them.
1721 for sHint in asHints:
1722 if sHint not in oInstr.dHints:
1723 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1724 else:
1725 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1726
1727 _ = iEndLine;
1728 return True;
1729
1730 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1731 """
1732 Tag: \@opdisenum
1733 Value: OP_XXXX
1734
1735 This is for select a specific (legacy) disassembler enum value for the
1736 instruction.
1737 """
1738 oInstr = self.ensureInstructionForOpTag(iTagLine);
1739
1740 # Flatten and split.
1741 asWords = self.flattenAllSections(aasSections).split();
1742 if len(asWords) != 1:
1743 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1744 if not asWords:
1745 return False;
1746 sDisEnum = asWords[0];
1747 if not self.oReDisEnum.match(sDisEnum):
1748 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1749 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1750
1751 # Set it.
1752 if oInstr.sDisEnum is not None:
1753 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1754 oInstr.sDisEnum = sDisEnum;
1755
1756 _ = iEndLine;
1757 return True;
1758
1759 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1760 """
1761 Tag: \@opmincpu
1762 Value: <simple CPU name>
1763
1764 Indicates when this instruction was introduced.
1765 """
1766 oInstr = self.ensureInstructionForOpTag(iTagLine);
1767
1768 # Flatten the value, split into words, make sure there's just one, valid it.
1769 asCpus = self.flattenAllSections(aasSections).split();
1770 if len(asCpus) > 1:
1771 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1772
1773 sMinCpu = asCpus[0];
1774 if sMinCpu in g_kdCpuNames:
1775 oInstr.sMinCpu = sMinCpu;
1776 else:
1777 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1778 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1779
1780 # Set it.
1781 if oInstr.sMinCpu is None:
1782 oInstr.sMinCpu = sMinCpu;
1783 elif oInstr.sMinCpu != sMinCpu:
1784 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1785
1786 _ = iEndLine;
1787 return True;
1788
1789 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1790 """
1791 Tag: \@opcpuid
1792 Value: none | <CPUID flag specifier>
1793
1794 CPUID feature bit which is required for the instruction to be present.
1795 """
1796 oInstr = self.ensureInstructionForOpTag(iTagLine);
1797
1798 # Flatten as a space separated list, split it up and validate the values.
1799 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1800 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1801 asCpuIds = [];
1802 else:
1803 fRc = True;
1804 for iCpuId, sCpuId in enumerate(asCpuIds):
1805 if sCpuId not in g_kdCpuIdFlags:
1806 if sCpuId.strip() in g_kdCpuIdFlags:
1807 sCpuId[iCpuId] = sCpuId.strip();
1808 else:
1809 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1810 if not fRc:
1811 return False;
1812
1813 # Append them.
1814 for sCpuId in asCpuIds:
1815 if sCpuId not in oInstr.asCpuIds:
1816 oInstr.asCpuIds.append(sCpuId);
1817 else:
1818 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1819
1820 _ = iEndLine;
1821 return True;
1822
1823 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1824 """
1825 Tag: \@opgroup
1826 Value: op_grp1[_subgrp2[_subsubgrp3]]
1827
1828 Instruction grouping.
1829 """
1830 oInstr = self.ensureInstructionForOpTag(iTagLine);
1831
1832 # Flatten as a space separated list, split it up and validate the values.
1833 asGroups = self.flattenAllSections(aasSections).split();
1834 if len(asGroups) != 1:
1835 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1836 sGroup = asGroups[0];
1837 if not self.oReGroupName.match(sGroup):
1838 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1839 % (sTag, sGroup, self.oReGroupName.pattern));
1840
1841 # Set it.
1842 if oInstr.sGroup is not None:
1843 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1844 oInstr.sGroup = sGroup;
1845
1846 _ = iEndLine;
1847 return True;
1848
1849 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1850 """
1851 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1852 Value: <invalid opcode behaviour style>
1853
1854 The \@opunused indicates the specification is for a currently unused
1855 instruction encoding.
1856
1857 The \@opinvalid indicates the specification is for an invalid currently
1858 instruction encoding (like UD2).
1859
1860 The \@opinvlstyle just indicates how CPUs decode the instruction when
1861 not supported (\@opcpuid, \@opmincpu) or disabled.
1862 """
1863 oInstr = self.ensureInstructionForOpTag(iTagLine);
1864
1865 # Flatten as a space separated list, split it up and validate the values.
1866 asStyles = self.flattenAllSections(aasSections).split();
1867 if len(asStyles) != 1:
1868 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1869 sStyle = asStyles[0];
1870 if sStyle not in g_kdInvalidStyles:
1871 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1872 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
1873 # Set it.
1874 if oInstr.sInvlStyle is not None:
1875 return self.errorComment(iTagLine,
1876 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1877 % ( sTag, oInstr.sInvlStyle, sStyle,));
1878 oInstr.sInvlStyle = sStyle;
1879 if sTag == '@opunused':
1880 oInstr.fUnused = True;
1881 elif sTag == '@opinvalid':
1882 oInstr.fInvalid = True;
1883
1884 _ = iEndLine;
1885 return True;
1886
1887 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
1888 """
1889 Tag: \@optest
1890 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1891 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1892
1893 The main idea here is to generate basic instruction tests.
1894
1895 The probably simplest way of handling the diverse input, would be to use
1896 it to produce size optimized byte code for a simple interpreter that
1897 modifies the register input and output states.
1898
1899 An alternative to the interpreter would be creating multiple tables,
1900 but that becomes rather complicated wrt what goes where and then to use
1901 them in an efficient manner.
1902 """
1903 oInstr = self.ensureInstructionForOpTag(iTagLine);
1904
1905 #
1906 # Do it section by section.
1907 #
1908 for asSectionLines in aasSections:
1909 #
1910 # Sort the input into outputs, inputs and selector conditions.
1911 #
1912 sFlatSection = self.flattenAllSections([asSectionLines,]);
1913 if not sFlatSection:
1914 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1915 continue;
1916 oTest = InstructionTest(oInstr);
1917
1918 asSelectors = [];
1919 asInputs = [];
1920 asOutputs = [];
1921 asCur = asOutputs;
1922 fRc = True;
1923 asWords = sFlatSection.split();
1924 for iWord in range(len(asWords) - 1, -1, -1):
1925 sWord = asWords[iWord];
1926 # Check for array switchers.
1927 if sWord == '->':
1928 if asCur != asOutputs:
1929 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1930 break;
1931 asCur = asInputs;
1932 elif sWord == '/':
1933 if asCur != asInputs:
1934 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1935 break;
1936 asCur = asSelectors;
1937 else:
1938 asCur.insert(0, sWord);
1939
1940 #
1941 # Validate and add selectors.
1942 #
1943 for sCond in asSelectors:
1944 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1945 oSelector = None;
1946 for sOp in TestSelector.kasCompareOps:
1947 off = sCondExp.find(sOp);
1948 if off >= 0:
1949 sVariable = sCondExp[:off];
1950 sValue = sCondExp[off + len(sOp):];
1951 if sVariable in TestSelector.kdVariables:
1952 if sValue in TestSelector.kdVariables[sVariable]:
1953 oSelector = TestSelector(sVariable, sOp, sValue);
1954 else:
1955 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1956 % ( sTag, sValue, sCond,
1957 TestSelector.kdVariables[sVariable].keys(),));
1958 else:
1959 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1960 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1961 break;
1962 if oSelector is not None:
1963 for oExisting in oTest.aoSelectors:
1964 if oExisting.sVariable == oSelector.sVariable:
1965 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1966 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
1967 oTest.aoSelectors.append(oSelector);
1968 else:
1969 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1970
1971 #
1972 # Validate outputs and inputs, adding them to the test as we go along.
1973 #
1974 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1975 for sItem in asItems:
1976 oItem = None;
1977 for sOp in TestInOut.kasOperators:
1978 off = sItem.find(sOp);
1979 if off >= 0:
1980 sField = sItem[:off];
1981 sValueType = sItem[off + len(sOp):];
1982 if sField in TestInOut.kdFields:
1983 asSplit = sValueType.split(':', 1);
1984 sValue = asSplit[0];
1985 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1986 if sType in TestInOut.kdTypes:
1987 oValid = TestInOut.kdTypes[sType].validate(sValue);
1988 if oValid is True:
1989 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
1990 oItem = TestInOut(sField, sOp, sValue, sType);
1991 else:
1992 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with the "="'
1993 % ( sTag, sDesc, sItem, ));
1994 else:
1995 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1996 % ( sTag, sDesc, sValue, sItem, sType, ));
1997 else:
1998 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1999 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2000 else:
2001 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
2002 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
2003 break;
2004 if oItem is not None:
2005 for oExisting in aoDst:
2006 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2007 self.errorComment(iTagLine,
2008 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2009 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2010 aoDst.append(oItem);
2011 else:
2012 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2013
2014 #
2015 # .
2016 #
2017 if fRc:
2018 oInstr.aoTests.append(oTest);
2019 else:
2020 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2021 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2022 % (sTag, asSelectors, asInputs, asOutputs,));
2023
2024 _ = iEndLine;
2025 return True;
2026
2027 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2028 """
2029 Tag: \@opfunction
2030 Value: <VMM function name>
2031
2032 This is for explicitly setting the IEM function name. Normally we pick
2033 this up from the FNIEMOP_XXX macro invocation after the description, or
2034 generate it from the mnemonic and operands.
2035
2036 It it thought it maybe necessary to set it when specifying instructions
2037 which implementation isn't following immediately or aren't implemented yet.
2038 """
2039 oInstr = self.ensureInstructionForOpTag(iTagLine);
2040
2041 # Flatten and validate the value.
2042 sFunction = self.flattenAllSections(aasSections);
2043 if not self.oReFunctionName.match(sFunction):
2044 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2045 % (sTag, sFunction, self.oReFunctionName.pattern));
2046
2047 if oInstr.sFunction is not None:
2048 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2049 % (sTag, oInstr.sFunction, sFunction,));
2050 oInstr.sFunction = sFunction;
2051
2052 _ = iEndLine;
2053 return True;
2054
2055 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2056 """
2057 Tag: \@opstats
2058 Value: <VMM statistics base name>
2059
2060 This is for explicitly setting the statistics name. Normally we pick
2061 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2062 the mnemonic and operands.
2063
2064 It it thought it maybe necessary to set it when specifying instructions
2065 which implementation isn't following immediately or aren't implemented yet.
2066 """
2067 oInstr = self.ensureInstructionForOpTag(iTagLine);
2068
2069 # Flatten and validate the value.
2070 sStats = self.flattenAllSections(aasSections);
2071 if not self.oReStatsName.match(sStats):
2072 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2073 % (sTag, sStats, self.oReStatsName.pattern));
2074
2075 if oInstr.sStats is not None:
2076 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2077 % (sTag, oInstr.sStats, sStats,));
2078 oInstr.sStats = sStats;
2079
2080 _ = iEndLine;
2081 return True;
2082
2083 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2084 """
2085 Tag: \@opdone
2086 Value: none
2087
2088 Used to explictily flush the instructions that have been specified.
2089 """
2090 sFlattened = self.flattenAllSections(aasSections);
2091 if sFlattened != '':
2092 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2093 _ = sTag; _ = iEndLine;
2094 return self.doneInstructions();
2095
2096 ## @}
2097
2098
2099 def parseComment(self):
2100 """
2101 Parse the current comment (self.sComment).
2102
2103 If it's a opcode specifiying comment, we reset the macro stuff.
2104 """
2105 #
2106 # Reject if comment doesn't seem to contain anything interesting.
2107 #
2108 if self.sComment.find('Opcode') < 0 \
2109 and self.sComment.find('@') < 0:
2110 return False;
2111
2112 #
2113 # Split the comment into lines, removing leading asterisks and spaces.
2114 # Also remove leading and trailing empty lines.
2115 #
2116 asLines = self.sComment.split('\n');
2117 for iLine, sLine in enumerate(asLines):
2118 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2119
2120 while asLines and not asLines[0]:
2121 self.iCommentLine += 1;
2122 asLines.pop(0);
2123
2124 while asLines and not asLines[-1]:
2125 asLines.pop(len(asLines) - 1);
2126
2127 #
2128 # Check for old style: Opcode 0x0f 0x12
2129 #
2130 if asLines[0].startswith('Opcode '):
2131 self.parseCommentOldOpcode(asLines);
2132
2133 #
2134 # Look for @op* tagged data.
2135 #
2136 cOpTags = 0;
2137 sFlatDefault = None;
2138 sCurTag = '@default';
2139 iCurTagLine = 0;
2140 asCurSection = [];
2141 aasSections = [ asCurSection, ];
2142 for iLine, sLine in enumerate(asLines):
2143 if not sLine.startswith('@'):
2144 if sLine:
2145 asCurSection.append(sLine);
2146 elif asCurSection:
2147 asCurSection = [];
2148 aasSections.append(asCurSection);
2149 else:
2150 #
2151 # Process the previous tag.
2152 #
2153 if sCurTag in self.dTagHandlers:
2154 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2155 cOpTags += 1;
2156 elif sCurTag.startswith('@op'):
2157 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2158 elif sCurTag == '@default':
2159 sFlatDefault = self.flattenAllSections(aasSections);
2160 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2161 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2162 elif sCurTag in ['@encoding', '@opencoding']:
2163 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2164
2165 #
2166 # New tag.
2167 #
2168 asSplit = sLine.split(None, 1);
2169 sCurTag = asSplit[0].lower();
2170 if len(asSplit) > 1:
2171 asCurSection = [asSplit[1],];
2172 else:
2173 asCurSection = [];
2174 aasSections = [asCurSection, ];
2175 iCurTagLine = iLine;
2176
2177 #
2178 # Process the final tag.
2179 #
2180 if sCurTag in self.dTagHandlers:
2181 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2182 cOpTags += 1;
2183 elif sCurTag.startswith('@op'):
2184 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2185 elif sCurTag == '@default':
2186 sFlatDefault = self.flattenAllSections(aasSections);
2187
2188 #
2189 # Don't allow default text in blocks containing @op*.
2190 #
2191 if cOpTags > 0 and sFlatDefault:
2192 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2193
2194 return True;
2195
2196 def parseMacroInvocation(self, sInvocation):
2197 """
2198 Parses a macro invocation.
2199
2200 Returns a tuple, first element is the offset following the macro
2201 invocation. The second element is a list of macro arguments, where the
2202 zero'th is the macro name.
2203 """
2204 # First the name.
2205 offOpen = sInvocation.find('(');
2206 if offOpen <= 0:
2207 self.raiseError("macro invocation open parenthesis not found");
2208 sName = sInvocation[:offOpen].strip();
2209 if not self.oReMacroName.match(sName):
2210 return self.error("invalid macro name '%s'" % (sName,));
2211 asRet = [sName, ];
2212
2213 # Arguments.
2214 iLine = self.iLine;
2215 cDepth = 1;
2216 off = offOpen + 1;
2217 offStart = off;
2218 while cDepth > 0:
2219 if off >= len(sInvocation):
2220 if iLine >= len(self.asLines):
2221 return self.error('macro invocation beyond end of file');
2222 sInvocation += self.asLines[iLine];
2223 iLine += 1;
2224 ch = sInvocation[off];
2225
2226 if ch == ',' or ch == ')':
2227 if cDepth == 1:
2228 asRet.append(sInvocation[offStart:off].strip());
2229 offStart = off + 1;
2230 if ch == ')':
2231 cDepth -= 1;
2232 elif ch == '(':
2233 cDepth += 1;
2234 off += 1;
2235
2236 return (off, asRet);
2237
2238 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2239 """
2240 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2241 """
2242 offHit = sCode.find(sMacro);
2243 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2244 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2245 return (offHit + offAfter, asRet);
2246 return (len(sCode), None);
2247
2248 def findAndParseMacroInvocation(self, sCode, sMacro):
2249 """
2250 Returns None if not found, arguments as per parseMacroInvocation if found.
2251 """
2252 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2253
2254 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2255 """
2256 Returns same as findAndParseMacroInvocation.
2257 """
2258 for sMacro in asMacro:
2259 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2260 if asRet is not None:
2261 return asRet;
2262 return None;
2263
2264 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2265 sDisHints, sIemHints, asOperands):
2266 """
2267 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2268 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2269 """
2270 #
2271 # Some invocation checks.
2272 #
2273 if sUpper != sUpper.upper():
2274 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2275 if sLower != sLower.lower():
2276 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2277 if sUpper.lower() != sLower:
2278 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2279 if not self.oReMnemonic.match(sLower):
2280 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2281
2282 #
2283 # Check if sIemHints tells us to not consider this macro invocation.
2284 #
2285 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2286 return True;
2287
2288 # Apply to the last instruction only for now.
2289 if not self.aoCurInstrs:
2290 self.addInstruction();
2291 oInstr = self.aoCurInstrs[-1];
2292 if oInstr.iLineMnemonicMacro == -1:
2293 oInstr.iLineMnemonicMacro = self.iLine;
2294 else:
2295 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2296 % (sMacro, oInstr.iLineMnemonicMacro,));
2297
2298 # Mnemonic
2299 if oInstr.sMnemonic is None:
2300 oInstr.sMnemonic = sLower;
2301 elif oInstr.sMnemonic != sLower:
2302 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2303
2304 # Process operands.
2305 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2306 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2307 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2308 for iOperand, sType in enumerate(asOperands):
2309 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2310 if sWhere is None:
2311 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2312 if iOperand < len(oInstr.aoOperands): # error recovery.
2313 sWhere = oInstr.aoOperands[iOperand].sWhere;
2314 sType = oInstr.aoOperands[iOperand].sType;
2315 else:
2316 sWhere = 'reg';
2317 sType = 'Gb';
2318 if iOperand == len(oInstr.aoOperands):
2319 oInstr.aoOperands.append(Operand(sWhere, sType))
2320 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2321 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2322 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2323 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2324
2325 # Encoding.
2326 if sForm not in g_kdIemForms:
2327 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2328 else:
2329 if oInstr.sEncoding is None:
2330 oInstr.sEncoding = g_kdIemForms[sForm][0];
2331 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2332 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2333 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2334
2335 # Check the parameter locations for the encoding.
2336 if g_kdIemForms[sForm][1] is not None:
2337 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2338 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2339 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2340 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2341
2342 # Stats.
2343 if not self.oReStatsName.match(sStats):
2344 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2345 elif oInstr.sStats is None:
2346 oInstr.sStats = sStats;
2347 elif oInstr.sStats != sStats:
2348 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2349 % (sMacro, oInstr.sStats, sStats,));
2350
2351 # Process the hints (simply merge with @ophints w/o checking anything).
2352 for sHint in sDisHints.split('|'):
2353 sHint = sHint.strip();
2354 if sHint.startswith('DISOPTYPE_'):
2355 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2356 if sShortHint in g_kdHints:
2357 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2358 else:
2359 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2360 elif sHint != '0':
2361 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2362
2363 for sHint in sIemHints.split('|'):
2364 sHint = sHint.strip();
2365 if sHint.startswith('IEMOPHINT_'):
2366 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2367 if sShortHint in g_kdHints:
2368 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2369 else:
2370 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2371 elif sHint != '0':
2372 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2373
2374
2375 _ = sAsm;
2376 return True;
2377
2378 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2379 """
2380 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2381 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2382 """
2383 if asOperands == 0:
2384 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2385 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2386 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2387
2388 def checkCodeForMacro(self, sCode):
2389 """
2390 Checks code for relevant macro invocation.
2391 """
2392 #
2393 # Scan macro invocations.
2394 #
2395 if sCode.find('(') > 0:
2396 # Look for instruction decoder function definitions. ASSUME single line.
2397 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2398 [ 'FNIEMOP_DEF',
2399 'FNIEMOP_STUB',
2400 'FNIEMOP_STUB_1',
2401 'FNIEMOP_UD_STUB',
2402 'FNIEMOP_UD_STUB_1' ]);
2403 if asArgs is not None:
2404 sFunction = asArgs[1];
2405
2406 if not self.aoCurInstrs:
2407 self.addInstruction();
2408 for oInstr in self.aoCurInstrs:
2409 if oInstr.iLineFnIemOpMacro == -1:
2410 oInstr.iLineFnIemOpMacro = self.iLine;
2411 else:
2412 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2413 self.setInstrunctionAttrib('sFunction', sFunction);
2414 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2415 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2416 if asArgs[0].find('STUB') > 0:
2417 self.doneInstructions();
2418 return True;
2419
2420 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2421 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2422 if asArgs is not None:
2423 if len(self.aoCurInstrs) == 1:
2424 oInstr = self.aoCurInstrs[0];
2425 if oInstr.sStats is None:
2426 oInstr.sStats = asArgs[1];
2427 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2428
2429 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2430 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2431 if asArgs is not None:
2432 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2433 []);
2434 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2435 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2436 if asArgs is not None:
2437 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2438 [asArgs[6],]);
2439 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2440 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2441 if asArgs is not None:
2442 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2443 [asArgs[6], asArgs[7]]);
2444 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2445 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2446 if asArgs is not None:
2447 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2448 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2449 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2450 # a_fIemHints)
2451 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2452 if asArgs is not None:
2453 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2454 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2455
2456 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2457 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2458 if asArgs is not None:
2459 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2460 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2461 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2462 if asArgs is not None:
2463 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2464 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2465 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2466 if asArgs is not None:
2467 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2468 [asArgs[4], asArgs[5],]);
2469 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2470 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2471 if asArgs is not None:
2472 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2473 [asArgs[4], asArgs[5], asArgs[6],]);
2474 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2475 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2476 if asArgs is not None:
2477 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2478 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2479
2480 return False;
2481
2482
2483 def parse(self):
2484 """
2485 Parses the given file.
2486 Returns number or errors.
2487 Raises exception on fatal trouble.
2488 """
2489 self.debug('Parsing %s' % (self.sSrcFile,));
2490
2491 while self.iLine < len(self.asLines):
2492 sLine = self.asLines[self.iLine];
2493 self.iLine += 1;
2494
2495 # We only look for comments, so only lines with a slash might possibly
2496 # influence the parser state.
2497 if sLine.find('/') >= 0:
2498 #self.debug('line %d: slash' % (self.iLine,));
2499
2500 offLine = 0;
2501 while offLine < len(sLine):
2502 if self.iState == self.kiCode:
2503 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2504 if offHit >= 0:
2505 self.checkCodeForMacro(sLine[offLine:offHit]);
2506 self.sComment = '';
2507 self.iCommentLine = self.iLine;
2508 self.iState = self.kiCommentMulti;
2509 offLine = offHit + 2;
2510 else:
2511 self.checkCodeForMacro(sLine[offLine:]);
2512 offLine = len(sLine);
2513
2514 elif self.iState == self.kiCommentMulti:
2515 offHit = sLine.find('*/', offLine);
2516 if offHit >= 0:
2517 self.sComment += sLine[offLine:offHit];
2518 self.iState = self.kiCode;
2519 offLine = offHit + 2;
2520 self.parseComment();
2521 else:
2522 self.sComment += sLine[offLine:];
2523 offLine = len(sLine);
2524 else:
2525 assert False;
2526
2527 # No slash, but append the line if in multi-line comment.
2528 elif self.iState == self.kiCommentMulti:
2529 #self.debug('line %d: multi' % (self.iLine,));
2530 self.sComment += sLine;
2531
2532 # No slash, but check code line for relevant macro.
2533 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2534 #self.debug('line %d: macro' % (self.iLine,));
2535 self.checkCodeForMacro(sLine);
2536
2537 # If the line is a '}' in the first position, complete the instructions.
2538 elif self.iState == self.kiCode and sLine[0] == '}':
2539 #self.debug('line %d: }' % (self.iLine,));
2540 self.doneInstructions();
2541
2542 self.doneInstructions();
2543 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2544 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2545 return self.printErrors();
2546
2547
2548def __parseFileByName(sSrcFile, sDefaultMap):
2549 """
2550 Parses one source file for instruction specfications.
2551 """
2552 #
2553 # Read sSrcFile into a line array.
2554 #
2555 try:
2556 oFile = open(sSrcFile, "r");
2557 except Exception as oXcpt:
2558 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2559 try:
2560 asLines = oFile.readlines();
2561 except Exception as oXcpt:
2562 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2563 finally:
2564 oFile.close();
2565
2566 #
2567 # Do the parsing.
2568 #
2569 try:
2570 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2571 except ParserException as oXcpt:
2572 print(str(oXcpt));
2573 raise;
2574 except Exception as oXcpt:
2575 raise;
2576
2577 return cErrors;
2578
2579
2580def __parseAll():
2581 """
2582 Parses all the IEMAllInstruction*.cpp.h files.
2583
2584 Raises exception on failure.
2585 """
2586 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2587 cErrors = 0;
2588 for sDefaultMap, sName in [
2589 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2590 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2591 ]:
2592 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2593
2594 if cErrors != 0:
2595 #raise Exception('%d parse errors' % (cErrors,));
2596 sys.exit(1);
2597 return True;
2598
2599
2600
2601__parseAll();
2602
2603
2604#
2605# Generators (may perhaps move later).
2606#
2607def generateDisassemblerTables(oDstFile = sys.stdout):
2608 """
2609 Generates disassembler tables.
2610 """
2611
2612 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2613 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2614 assert oMap.sName == sName;
2615 asLines = [];
2616
2617 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2618 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2619 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2620 asLines.append('{');
2621
2622 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2623
2624 aoTableOrder = oMap.getInstructionsInTableOrder();
2625 for iInstr, oInstr in enumerate(aoTableOrder):
2626
2627 if (iInstr & 0xf) == 0:
2628 if iInstr != 0:
2629 asLines.append('');
2630 asLines.append(' /* %x */' % (iInstr >> 4,));
2631
2632 if oInstr is None:
2633 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2634 elif isinstance(oInstr, list):
2635 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2636 else:
2637 sMacro = 'OP';
2638 cMaxOperands = 3;
2639 if len(oInstr.aoOperands) > 3:
2640 sMacro = 'OPVEX'
2641 cMaxOperands = 4;
2642 assert len(oInstr.aoOperands) <= cMaxOperands;
2643
2644 #
2645 # Format string.
2646 #
2647 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2648 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2649 sTmp += ' ' if iOperand == 0 else ',';
2650 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2651 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2652 else:
2653 sTmp += g_kdOpTypes[oOperand.sType][2];
2654 sTmp += '",';
2655 asColumns = [ sTmp, ];
2656
2657 #
2658 # Decoders.
2659 #
2660 iStart = len(asColumns);
2661 if oInstr.sEncoding is None:
2662 pass;
2663 elif oInstr.sEncoding == 'ModR/M':
2664 # ASSUME the first operand is using the ModR/M encoding
2665 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
2666 asColumns.append('IDX_ParseModRM,');
2667 ## @todo IDX_ParseVexDest
2668 # Is second operand using ModR/M too?
2669 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
2670 asColumns.append('IDX_UseModRM,')
2671 elif oInstr.sEncoding in [ 'prefix', ]:
2672 for oOperand in oInstr.aoOperands:
2673 asColumns.append('0,');
2674 elif oInstr.sEncoding in [ 'fixed' ]:
2675 pass;
2676 elif oInstr.sEncoding == 'vex2':
2677 asColumns.append('IDX_ParseVex2b,')
2678 elif oInstr.sEncoding == 'vex3':
2679 asColumns.append('IDX_ParseVex3b,')
2680 elif oInstr.sEncoding in g_dInstructionMaps:
2681 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
2682 else:
2683 ## @todo
2684 #IDX_ParseTwoByteEsc,
2685 #IDX_ParseGrp1,
2686 #IDX_ParseShiftGrp2,
2687 #IDX_ParseGrp3,
2688 #IDX_ParseGrp4,
2689 #IDX_ParseGrp5,
2690 #IDX_Parse3DNow,
2691 #IDX_ParseGrp6,
2692 #IDX_ParseGrp7,
2693 #IDX_ParseGrp8,
2694 #IDX_ParseGrp9,
2695 #IDX_ParseGrp10,
2696 #IDX_ParseGrp12,
2697 #IDX_ParseGrp13,
2698 #IDX_ParseGrp14,
2699 #IDX_ParseGrp15,
2700 #IDX_ParseGrp16,
2701 #IDX_ParseThreeByteEsc4,
2702 #IDX_ParseThreeByteEsc5,
2703 #IDX_ParseModFence,
2704 #IDX_ParseEscFP,
2705 #IDX_ParseNopPause,
2706 #IDX_ParseInvOpModRM,
2707 assert False, str(oInstr);
2708
2709 # Check for immediates and stuff in the remaining operands.
2710 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
2711 sIdx = g_kdOpTypes[oOperand.sType][0];
2712 if sIdx != 'IDX_UseModRM':
2713 asColumns.append(sIdx + ',');
2714 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
2715
2716 #
2717 # Opcode and operands.
2718 #
2719 assert oInstr.sDisEnum, str(oInstr);
2720 asColumns.append(oInstr.sDisEnum + ',');
2721 iStart = len(asColumns)
2722 for oOperand in oInstr.aoOperands:
2723 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
2724 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
2725
2726 #
2727 # Flags.
2728 #
2729 sTmp = '';
2730 for sHint in sorted(oInstr.dHints.keys()):
2731 sDefine = g_kdHints[sHint];
2732 if sDefine.startswith('DISOPTYPE_'):
2733 if sTmp:
2734 sTmp += ' | ' + sDefine;
2735 else:
2736 sTmp += sDefine;
2737 if sTmp:
2738 sTmp += '),';
2739 else:
2740 sTmp += '0),';
2741 asColumns.append(sTmp);
2742
2743 #
2744 # Format the columns into a line.
2745 #
2746 sLine = '';
2747 for i, s in enumerate(asColumns):
2748 if len(sLine) < aoffColumns[i]:
2749 sLine += ' ' * (aoffColumns[i] - len(sLine));
2750 else:
2751 sLine += ' ';
2752 sLine += s;
2753
2754 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
2755 # DISOPTYPE_HARMLESS),
2756 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
2757 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
2758
2759 asLines.append(sLine);
2760
2761 asLines.append('};');
2762 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
2763
2764 #
2765 # Write out the lines.
2766 #
2767 oDstFile.write('\n'.join(asLines));
2768 oDstFile.write('\n');
2769 break; #for now
2770
2771if __name__ == '__main__':
2772 generateDisassemblerTables();
2773
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette