VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66135

Last change on this file since 66135 was 66135, checked in by vboxsync, 8 years ago

IEM: Implemented AAA.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 120.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66135 2017-03-16 15:53:06Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66135 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163
164 # ModR/M.rm - memory only.
165 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
166
167 # ModR/M.reg
168 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
169 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
170 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
171
172 # Immediate values.
173 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
174 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
175 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
176 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
177 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
178 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
179
180 # Address operands (no ModR/M).
181 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
182 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
183
184 # Relative jump targets
185 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
186 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
187
188 # DS:rSI
189 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
190 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
191 # ES:rDI
192 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
193 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
194
195 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
196
197 # Fixed registers.
198 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
199 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
200 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
201 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
202 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
203 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
204 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
205 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
206};
207
208# IDX_ParseFixedReg
209# IDX_ParseVexDest
210
211
212## IEMFORM_XXX mappings.
213g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
214 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
215 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
216 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
217 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
218 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
219 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
220 'M': ( 'ModR/M', [ 'rm', ], ),
221 'M_REG': ( 'ModR/M', [ 'rm', ], ),
222 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
223 'R': ( 'ModR/M', [ 'reg', ], ),
224 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
225 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
226 'FIXED': ( 'fixed', None, )
227};
228
229## \@oppfx values.
230g_kdPrefixes = {
231 '0x66': [],
232 '0xf3': [],
233 '0xf2': [],
234};
235
236## Special \@opcode tag values.
237g_kdSpecialOpcodes = {
238 '/reg': [],
239 'mr/reg': [],
240 '11 /reg': [],
241 '!11 /reg': [],
242 '11 mr/reg': [],
243 '!11 mr/reg': [],
244};
245
246## Valid values for \@openc
247g_kdEncodings = {
248 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
249 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
250 'prefix': [ None, ], ##< Prefix
251};
252
253## \@opunused, \@opinvalid, \@opinvlstyle
254g_kdInvalidStyles = {
255 'immediate': [], ##< CPU stops decoding immediately after the opcode.
256 'intel-modrm': [], ##< Intel decodes ModR/M.
257 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
258 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
259 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
260};
261
262g_kdCpuNames = {
263 '8086': (),
264 '80186': (),
265 '80286': (),
266 '80386': (),
267 '80486': (),
268};
269
270## \@opcpuid
271g_kdCpuIdFlags = {
272 'vme': 'X86_CPUID_FEATURE_EDX_VME',
273 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
274 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
275 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
276 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
277 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
278 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
279 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
280 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
281 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
282 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
283 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
284 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
285 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
286 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
287 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
288 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
289 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
290 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
291 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
292 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
293 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
294 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
295 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
296 'aes': 'X86_CPUID_FEATURE_ECX_AES',
297 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
298 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
299 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
300 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
301
302 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
303 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
304 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
305 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
306 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
307 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
308 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
309 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
310 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
311 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
312};
313
314## \@ophints values.
315g_kdHints = {
316 'invalid': 'DISOPTYPE_INVALID', ##<
317 'harmless': 'DISOPTYPE_HARMLESS', ##<
318 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
319 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
320 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
321 'portio': 'DISOPTYPE_PORTIO', ##<
322 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
323 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
324 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
325 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
326 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
327 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
328 'illegal': 'DISOPTYPE_ILLEGAL', ##<
329 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
330 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
331 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
332 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
333 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
334 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
335 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
336 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
337 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
338 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
339 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
340 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
341 ## (only in 16 & 32 bits mode!)
342 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
343 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
344 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
345 'ignores_op_size': '', ##< Ignores both operand size prefixes.
346};
347
348
349def _isValidOpcodeByte(sOpcode):
350 """
351 Checks if sOpcode is a valid lower case opcode byte.
352 Returns true/false.
353 """
354 if len(sOpcode) == 4:
355 if sOpcode[:2] == '0x':
356 if sOpcode[2] in '0123456789abcdef':
357 if sOpcode[3] in '0123456789abcdef':
358 return True;
359 return False;
360
361
362class InstructionMap(object):
363 """
364 Instruction map.
365
366 The opcode map provides the lead opcode bytes (empty for the one byte
367 opcode map). An instruction can be member of multiple opcode maps as long
368 as it uses the same opcode value within the map (because of VEX).
369 """
370
371 kdEncodings = {
372 'legacy': [],
373 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
374 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
375 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
376 'xop8': [], ##< XOP prefix with vvvvv = 8
377 'xop9': [], ##< XOP prefix with vvvvv = 9
378 'xop10': [], ##< XOP prefix with vvvvv = 10
379 };
380 ## Selectors.
381 ## The first value is the number of table entries required by a
382 ## decoder or disassembler for this type of selector.
383 kdSelectors = {
384 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
385 '/r': [ 8, ], ##< modrm.reg selects the instruction.
386 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
387 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
388 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
389 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
390 };
391
392 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
393 assert sSelector in self.kdSelectors;
394 assert sEncoding in self.kdEncodings;
395 if asLeadOpcodes is None:
396 asLeadOpcodes = [];
397 else:
398 for sOpcode in asLeadOpcodes:
399 assert _isValidOpcodeByte(sOpcode);
400 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
401
402 self.sName = sName;
403 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
404 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
405 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
406 self.aoInstructions = []; # type: Instruction
407 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
408
409 def getTableSize(self):
410 """
411 Number of table entries. This corresponds directly to the selector.
412 """
413 return self.kdSelectors[self.sSelector][0];
414
415 def getInstructionIndex(self, oInstr):
416 """
417 Returns the table index for the instruction.
418 """
419 bOpcode = oInstr.getOpcodeByte();
420
421 # The byte selector is simple. We need a full opcode byte and need just return it.
422 if self.sSelector == 'byte':
423 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
424 return bOpcode;
425
426 # The other selectors needs masking and shifting.
427 if self.sSelector == '/r':
428 return (bOpcode >> 3) & 0x7;
429
430 if self.sSelector == 'mod /r':
431 return (bOpcode >> 3) & 0x1f;
432
433 if self.sSelector == '!11 /r':
434 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
435 return (bOpcode >> 3) & 0x7;
436
437 if self.sSelector == '11 /r':
438 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
439 return (bOpcode >> 3) & 0x7;
440
441 if self.sSelector == '11':
442 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
443 return bOpcode & 0x3f;
444
445 assert False, self.sSelector;
446 return -1;
447
448 def getInstructionsInTableOrder(self):
449 """
450 Get instructions in table order.
451
452 Returns array of instructions. Normally there is exactly one
453 instruction per entry. However the entry could also be None if
454 not instruction was specified for that opcode value. Or there
455 could be a list of instructions to deal with special encodings
456 where for instance prefix (e.g. REX.W) encodes a different
457 instruction or different CPUs have different instructions or
458 prefixes in the same place.
459 """
460 # Start with empty table.
461 cTable = self.getTableSize();
462 aoTable = [None] * cTable;
463
464 # Insert the instructions.
465 for oInstr in self.aoInstructions:
466 if oInstr.sOpcode:
467 idxOpcode = self.getInstructionIndex(oInstr);
468 assert idxOpcode < cTable, str(idxOpcode);
469
470 oExisting = aoTable[idxOpcode];
471 if oExisting is None:
472 aoTable[idxOpcode] = oInstr;
473 elif not isinstance(oExisting, list):
474 aoTable[idxOpcode] = list([oExisting, oInstr]);
475 else:
476 oExisting.append(oInstr);
477
478 return aoTable;
479
480
481 def getDisasTableName(self):
482 """
483 Returns the disassembler table name for this map.
484 """
485 sName = 'g_aDisas';
486 for sWord in self.sName.split('_'):
487 if sWord == 'm': # suffix indicating modrm.mod==mem
488 sName += '_m';
489 elif sWord == 'r': # suffix indicating modrm.mod==reg
490 sName += '_r';
491 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
492 sName += '_' + sWord;
493 else:
494 sWord = sWord.replace('grp', 'Grp');
495 sWord = sWord.replace('map', 'Map');
496 sName += sWord[0].upper() + sWord[1:];
497 return sName;
498
499
500class TestType(object):
501 """
502 Test value type.
503
504 This base class deals with integer like values. The fUnsigned constructor
505 parameter indicates the default stance on zero vs sign extending. It is
506 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
507 """
508 def __init__(self, sName, acbSizes = None, fUnsigned = True):
509 self.sName = sName;
510 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
511 self.fUnsigned = fUnsigned;
512
513 class BadValue(Exception):
514 """ Bad value exception. """
515 def __init__(self, sMessage):
516 Exception.__init__(self, sMessage);
517 self.sMessage = sMessage;
518
519 ## For ascii ~ operator.
520 kdHexInv = {
521 '0': 'f',
522 '1': 'e',
523 '2': 'd',
524 '3': 'c',
525 '4': 'b',
526 '5': 'a',
527 '6': '9',
528 '7': '8',
529 '8': '7',
530 '9': '6',
531 'a': '5',
532 'b': '4',
533 'c': '3',
534 'd': '2',
535 'e': '1',
536 'f': '0',
537 };
538
539 def get(self, sValue):
540 """
541 Get the shortest normal sized byte representation of oValue.
542
543 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
544 The latter form is for AND+OR pairs where the first entry is what to
545 AND with the field and the second the one or OR with.
546
547 Raises BadValue if invalid value.
548 """
549 if not sValue:
550 raise TestType.BadValue('empty value');
551
552 # Deal with sign and detect hexadecimal or decimal.
553 fSignExtend = not self.fUnsigned;
554 if sValue[0] == '-' or sValue[0] == '+':
555 fSignExtend = True;
556 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
557 else:
558 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
559
560 # try convert it to long integer.
561 try:
562 iValue = long(sValue, 16 if fHex else 10);
563 except Exception as oXcpt:
564 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
565
566 # Convert the hex string and pad it to a decent value. Negative values
567 # needs to be manually converted to something non-negative (~-n + 1).
568 if iValue >= 0:
569 sHex = hex(iValue);
570 if sys.version_info[0] < 3:
571 assert sHex[-1] == 'L';
572 sHex = sHex[:-1];
573 assert sHex[:2] == '0x';
574 sHex = sHex[2:];
575 else:
576 sHex = hex(-iValue - 1);
577 if sys.version_info[0] < 3:
578 assert sHex[-1] == 'L';
579 sHex = sHex[:-1];
580 assert sHex[:2] == '0x';
581 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
582 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
583 sHex = 'f' + sHex;
584
585 cDigits = len(sHex);
586 if cDigits <= self.acbSizes[-1] * 2:
587 for cb in self.acbSizes:
588 cNaturalDigits = cb * 2;
589 if cDigits <= cNaturalDigits:
590 break;
591 else:
592 cNaturalDigits = self.acbSizes[-1] * 2;
593 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
594 assert isinstance(cNaturalDigits, int)
595
596 if cNaturalDigits != cDigits:
597 cNeeded = cNaturalDigits - cDigits;
598 if iValue >= 0:
599 sHex = ('0' * cNeeded) + sHex;
600 else:
601 sHex = ('f' * cNeeded) + sHex;
602
603 # Invert and convert to bytearray and return it.
604 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
605
606 return ((fSignExtend, abValue),);
607
608 def validate(self, sValue):
609 """
610 Returns True if value is okay, error message on failure.
611 """
612 try:
613 self.get(sValue);
614 except TestType.BadValue as oXcpt:
615 return oXcpt.sMessage;
616 return True;
617
618 def isAndOrPair(self, sValue):
619 """
620 Checks if sValue is a pair.
621 """
622 _ = sValue;
623 return False;
624
625
626class TestTypeEflags(TestType):
627 """
628 Special value parsing for EFLAGS/RFLAGS/FLAGS.
629 """
630
631 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
632
633 def __init__(self, sName):
634 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
635
636 def get(self, sValue):
637 fClear = 0;
638 fSet = 0;
639 for sFlag in sValue.split(','):
640 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
641 if sConstant is None:
642 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
643 if sConstant[0] == '!':
644 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
645 else:
646 fSet |= g_kdX86EFlagsConstants[sConstant];
647
648 aoSet = TestType.get(self, '0x%x' % (fSet,));
649 if fClear != 0:
650 aoClear = TestType.get(self, '%#x' % (fClear,))
651 assert self.isAndOrPair(sValue) is True;
652 return (aoClear[0], aoSet[0]);
653 assert self.isAndOrPair(sValue) is False;
654 return aoSet;
655
656 def isAndOrPair(self, sValue):
657 for sZeroFlag in self.kdZeroValueFlags:
658 if sValue.find(sZeroFlag) >= 0:
659 return True;
660 return False;
661
662
663
664class TestInOut(object):
665 """
666 One input or output state modifier.
667
668 This should be thought as values to modify BS3REGCTX and extended (needs
669 to be structured) state.
670 """
671 ## Assigned operators.
672 kasOperators = [
673 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
674 '&~=',
675 '&=',
676 '|=',
677 '='
678 ];
679 ## Types
680 kdTypes = {
681 'uint': TestType('uint', fUnsigned = True),
682 'int': TestType('int'),
683 'efl': TestTypeEflags('efl'),
684 };
685 ## CPU context fields.
686 kdFields = {
687 # name: ( default type, tbd, )
688 # Operands.
689 'op1': ( 'uint', '', ), ## \@op1
690 'op2': ( 'uint', '', ), ## \@op2
691 'op3': ( 'uint', '', ), ## \@op3
692 'op4': ( 'uint', '', ), ## \@op4
693 # Flags.
694 'efl': ( 'efl', '', ),
695 'efl_undef': ( 'uint', '', ),
696 # 8-bit GPRs.
697 'al': ( 'uint', '', ),
698 'cl': ( 'uint', '', ),
699 'dl': ( 'uint', '', ),
700 'bl': ( 'uint', '', ),
701 'ah': ( 'uint', '', ),
702 'ch': ( 'uint', '', ),
703 'dh': ( 'uint', '', ),
704 'bh': ( 'uint', '', ),
705 'r8l': ( 'uint', '', ),
706 'r9l': ( 'uint', '', ),
707 'r10l': ( 'uint', '', ),
708 'r11l': ( 'uint', '', ),
709 'r12l': ( 'uint', '', ),
710 'r13l': ( 'uint', '', ),
711 'r14l': ( 'uint', '', ),
712 'r15l': ( 'uint', '', ),
713 # 16-bit GPRs.
714 'ax': ( 'uint', '', ),
715 'dx': ( 'uint', '', ),
716 'cx': ( 'uint', '', ),
717 'bx': ( 'uint', '', ),
718 'sp': ( 'uint', '', ),
719 'bp': ( 'uint', '', ),
720 'si': ( 'uint', '', ),
721 'di': ( 'uint', '', ),
722 'r8w': ( 'uint', '', ),
723 'r9w': ( 'uint', '', ),
724 'r10w': ( 'uint', '', ),
725 'r11w': ( 'uint', '', ),
726 'r12w': ( 'uint', '', ),
727 'r13w': ( 'uint', '', ),
728 'r14w': ( 'uint', '', ),
729 'r15w': ( 'uint', '', ),
730 # 32-bit GPRs.
731 'eax': ( 'uint', '', ),
732 'edx': ( 'uint', '', ),
733 'ecx': ( 'uint', '', ),
734 'ebx': ( 'uint', '', ),
735 'esp': ( 'uint', '', ),
736 'ebp': ( 'uint', '', ),
737 'esi': ( 'uint', '', ),
738 'edi': ( 'uint', '', ),
739 'r8d': ( 'uint', '', ),
740 'r9d': ( 'uint', '', ),
741 'r10d': ( 'uint', '', ),
742 'r11d': ( 'uint', '', ),
743 'r12d': ( 'uint', '', ),
744 'r13d': ( 'uint', '', ),
745 'r14d': ( 'uint', '', ),
746 'r15d': ( 'uint', '', ),
747 # 64-bit GPRs.
748 'rax': ( 'uint', '', ),
749 'rdx': ( 'uint', '', ),
750 'rcx': ( 'uint', '', ),
751 'rbx': ( 'uint', '', ),
752 'rsp': ( 'uint', '', ),
753 'rbp': ( 'uint', '', ),
754 'rsi': ( 'uint', '', ),
755 'rdi': ( 'uint', '', ),
756 'r8': ( 'uint', '', ),
757 'r9': ( 'uint', '', ),
758 'r10': ( 'uint', '', ),
759 'r11': ( 'uint', '', ),
760 'r12': ( 'uint', '', ),
761 'r13': ( 'uint', '', ),
762 'r14': ( 'uint', '', ),
763 'r15': ( 'uint', '', ),
764 # 16-bit, 32-bit or 64-bit registers according to operand size.
765 'oz.rax': ( 'uint', '', ),
766 'oz.rdx': ( 'uint', '', ),
767 'oz.rcx': ( 'uint', '', ),
768 'oz.rbx': ( 'uint', '', ),
769 'oz.rsp': ( 'uint', '', ),
770 'oz.rbp': ( 'uint', '', ),
771 'oz.rsi': ( 'uint', '', ),
772 'oz.rdi': ( 'uint', '', ),
773 'oz.r8': ( 'uint', '', ),
774 'oz.r9': ( 'uint', '', ),
775 'oz.r10': ( 'uint', '', ),
776 'oz.r11': ( 'uint', '', ),
777 'oz.r12': ( 'uint', '', ),
778 'oz.r13': ( 'uint', '', ),
779 'oz.r14': ( 'uint', '', ),
780 'oz.r15': ( 'uint', '', ),
781 };
782
783 def __init__(self, sField, sOp, sValue, sType):
784 assert sField in self.kdFields;
785 assert sOp in self.kasOperators;
786 self.sField = sField;
787 self.sOp = sOp;
788 self.sValue = sValue;
789 self.sType = sType;
790 assert isinstance(sField, str);
791 assert isinstance(sOp, str);
792 assert isinstance(sType, str);
793 assert isinstance(sValue, str);
794
795
796class TestSelector(object):
797 """
798 One selector for an instruction test.
799 """
800 ## Selector compare operators.
801 kasCompareOps = [ '==', '!=' ];
802 ## Selector variables and their valid values.
803 kdVariables = {
804 # Operand size.
805 'size': {
806 'o16': 'size_o16',
807 'o32': 'size_o32',
808 'o64': 'size_o64',
809 },
810 # Execution ring.
811 'ring': {
812 '0': 'ring_0',
813 '1': 'ring_1',
814 '2': 'ring_2',
815 '3': 'ring_3',
816 '0..2': 'ring_0_thru_2',
817 '1..3': 'ring_1_thru_3',
818 },
819 # Basic code mode.
820 'codebits': {
821 '64': 'code_64bit',
822 '32': 'code_32bit',
823 '16': 'code_16bit',
824 },
825 # cpu modes.
826 'mode': {
827 'real': 'mode_real',
828 'prot': 'mode_prot',
829 'long': 'mode_long',
830 'v86': 'mode_v86',
831 'smm': 'mode_smm',
832 'vmx': 'mode_vmx',
833 'svm': 'mode_svm',
834 },
835 # paging on/off
836 'paging': {
837 'on': 'paging_on',
838 'off': 'paging_off',
839 },
840 };
841 ## Selector shorthand predicates.
842 ## These translates into variable expressions.
843 kdPredicates = {
844 'o16': 'size==o16',
845 'o32': 'size==o32',
846 'o64': 'size==o64',
847 'ring0': 'ring==0',
848 '!ring0': 'ring==1..3',
849 'ring1': 'ring==1',
850 'ring2': 'ring==2',
851 'ring3': 'ring==3',
852 'user': 'ring==3',
853 'supervisor': 'ring==0..2',
854 'real': 'mode==real',
855 'prot': 'mode==prot',
856 'long': 'mode==long',
857 'v86': 'mode==v86',
858 'smm': 'mode==smm',
859 'vmx': 'mode==vmx',
860 'svm': 'mode==svm',
861 'paging': 'paging==on',
862 '!paging': 'paging==off',
863 };
864
865 def __init__(self, sVariable, sOp, sValue):
866 assert sVariable in self.kdVariables;
867 assert sOp in self.kasCompareOps;
868 assert sValue in self.kdVariables[sVariable];
869 self.sVariable = sVariable;
870 self.sOp = sOp;
871 self.sValue = sValue;
872
873
874class InstructionTest(object):
875 """
876 Instruction test.
877 """
878
879 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
880 self.oInstr = oInstr; # type: InstructionTest
881 self.aoInputs = []; # type: list(TestInOut)
882 self.aoOutputs = []; # type: list(TestInOut)
883 self.aoSelectors = []; # type: list(TestSelector)
884
885 def toString(self, fRepr = False):
886 """
887 Converts it to string representation.
888 """
889 asWords = [];
890 if self.aoSelectors:
891 for oSelector in self.aoSelectors:
892 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
893 asWords.append('/');
894
895 for oModifier in self.aoInputs:
896 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
897
898 asWords.append('->');
899
900 for oModifier in self.aoOutputs:
901 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
902
903 if fRepr:
904 return '<' + ' '.join(asWords) + '>';
905 return ' '.join(asWords);
906
907 def __str__(self):
908 """ Provide string represenation. """
909 return self.toString(False);
910
911 def __repr__(self):
912 """ Provide unambigious string representation. """
913 return self.toString(True);
914
915class Operand(object):
916 """
917 Instruction operand.
918 """
919
920 def __init__(self, sWhere, sType):
921 assert sWhere in g_kdOpLocations, sWhere;
922 assert sType in g_kdOpTypes, sType;
923 self.sWhere = sWhere; ##< g_kdOpLocations
924 self.sType = sType; ##< g_kdOpTypes
925
926 def usesModRM(self):
927 """ Returns True if using some form of ModR/M encoding. """
928 return self.sType[0] in ['E', 'G', 'M'];
929
930
931
932class Instruction(object): # pylint: disable=too-many-instance-attributes
933 """
934 Instruction.
935 """
936
937 def __init__(self, sSrcFile, iLine):
938 ## @name Core attributes.
939 ## @{
940 self.sMnemonic = None;
941 self.sBrief = None;
942 self.asDescSections = []; # type: list(str)
943 self.aoMaps = []; # type: list(InstructionMap)
944 self.aoOperands = []; # type: list(Operand)
945 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
946 self.sOpcode = None; # type: str
947 self.sEncoding = None;
948 self.asFlTest = None;
949 self.asFlModify = None;
950 self.asFlUndefined = None;
951 self.asFlSet = None;
952 self.asFlClear = None;
953 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
954 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
955 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
956 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
957 self.aoTests = []; # type: list(InstructionTest)
958 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
959 self.oCpuExpr = None; ##< Some CPU restriction expression...
960 self.sGroup = None;
961 self.fUnused = False; ##< Unused instruction.
962 self.fInvalid = False; ##< Invalid instruction (like UD2).
963 self.sInvalidStyle = None; ##< Invalid behviour style
964 ## @}
965
966 ## @name Implementation attributes.
967 ## @{
968 self.sStats = None;
969 self.sFunction = None;
970 self.fStub = False;
971 self.fUdStub = False;
972 ## @}
973
974 ## @name Decoding info
975 ## @{
976 self.sSrcFile = sSrcFile;
977 self.iLineCreated = iLine;
978 self.iLineCompleted = None;
979 self.cOpTags = 0;
980 self.iLineFnIemOpMacro = -1;
981 self.iLineMnemonicMacro = -1;
982 ## @}
983
984 ## @name Intermediate input fields.
985 ## @{
986 self.sRawDisOpNo = None;
987 self.asRawDisParams = [];
988 self.sRawIemOpFlags = None;
989 self.sRawOldOpcodes = None;
990 self.asCopyTests = [];
991 ## @}
992
993 def toString(self, fRepr = False):
994 """ Turn object into a string. """
995 aasFields = [];
996
997 aasFields.append(['opcode', self.sOpcode]);
998 aasFields.append(['mnemonic', self.sMnemonic]);
999 for iOperand, oOperand in enumerate(self.aoOperands):
1000 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1001 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1002 aasFields.append(['encoding', self.sEncoding]);
1003 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1004 aasFields.append(['disenum', self.sDisEnum]);
1005 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1006 aasFields.append(['group', self.sGroup]);
1007 if self.fUnused: aasFields.append(['unused', 'True']);
1008 if self.fInvalid: aasFields.append(['invalid', 'True']);
1009 aasFields.append(['invlstyle', self.sInvalidStyle]);
1010 aasFields.append(['fltest', self.asFlTest]);
1011 aasFields.append(['flmodify', self.asFlModify]);
1012 aasFields.append(['flundef', self.asFlUndefined]);
1013 aasFields.append(['flset', self.asFlSet]);
1014 aasFields.append(['flclear', self.asFlClear]);
1015 aasFields.append(['mincpu', self.sMinCpu]);
1016 aasFields.append(['stats', self.sStats]);
1017 aasFields.append(['sFunction', self.sFunction]);
1018 if self.fStub: aasFields.append(['fStub', 'True']);
1019 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1020 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1021 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1022 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1023
1024 sRet = '<' if fRepr else '';
1025 for sField, sValue in aasFields:
1026 if sValue != None:
1027 if len(sRet) > 1:
1028 sRet += '; ';
1029 sRet += '%s=%s' % (sField, sValue,);
1030 if fRepr:
1031 sRet += '>';
1032
1033 return sRet;
1034
1035 def __str__(self):
1036 """ Provide string represenation. """
1037 return self.toString(False);
1038
1039 def __repr__(self):
1040 """ Provide unambigious string representation. """
1041 return self.toString(True);
1042
1043 def getOpcodeByte(self):
1044 """
1045 Decodes sOpcode into a byte range integer value.
1046 Raises exception if sOpcode is None or invalid.
1047 """
1048 if self.sOpcode is None:
1049 raise Exception('No opcode byte for %s!' % (self,));
1050 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1051
1052 # Full hex byte form.
1053 if sOpcode[:2] == '0x':
1054 return int(sOpcode, 16);
1055
1056 # The /r form:
1057 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
1058 return int(sOpcode[1:]) << 3;
1059
1060 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1061
1062 @staticmethod
1063 def _flagsToIntegerMask(asFlags):
1064 """
1065 Returns the integer mask value for asFlags.
1066 """
1067 uRet = 0;
1068 if asFlags:
1069 for sFlag in asFlags:
1070 sConstant = g_kdEFlagsMnemonics[sFlag];
1071 assert sConstant[0] != '!', sConstant
1072 uRet |= g_kdX86EFlagsConstants[sConstant];
1073 return uRet;
1074
1075 def getTestedFlagsMask(self):
1076 """ Returns asFlTest into a integer mask value """
1077 return self._flagsToIntegerMask(self.asFlTest);
1078
1079 def getModifiedFlagsMask(self):
1080 """ Returns asFlModify into a integer mask value """
1081 return self._flagsToIntegerMask(self.asFlModify);
1082
1083 def getUndefinedFlagsMask(self):
1084 """ Returns asFlUndefined into a integer mask value """
1085 return self._flagsToIntegerMask(self.asFlUndefined);
1086
1087 def getSetFlagsMask(self):
1088 """ Returns asFlSet into a integer mask value """
1089 return self._flagsToIntegerMask(self.asFlSet);
1090
1091 def getClearedFlagsMask(self):
1092 """ Returns asFlClear into a integer mask value """
1093 return self._flagsToIntegerMask(self.asFlClear);
1094
1095
1096## All the instructions.
1097g_aoAllInstructions = []; # type: list(Instruction)
1098
1099## All the instructions indexed by statistics name (opstat).
1100g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1101
1102## All the instructions indexed by function name (opfunction).
1103g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1104
1105## Instructions tagged by oponlytest
1106g_aoOnlyTestInstructions = []; # type: list(Instruction)
1107
1108## Instruction maps.
1109g_dInstructionMaps = {
1110 'one': InstructionMap('one'),
1111 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1112 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1113 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1114 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1115 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1116 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1117 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1118 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1119 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1120 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1121 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1122 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1123 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1124 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1125 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1126 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1127 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1128 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1129 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1130
1131 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1132 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1133 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1134 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1135 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1136 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1137 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1138 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1139 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1140 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1141 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1142 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1143 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1144 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1145
1146 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1147 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1148
1149 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1150 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1151 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1152 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1153 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1154 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1155
1156 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1157 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1158
1159 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1160 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1161 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1162 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1163 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1164 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1165 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1166};
1167
1168
1169
1170class ParserException(Exception):
1171 """ Parser exception """
1172 def __init__(self, sMessage):
1173 Exception.__init__(self, sMessage);
1174
1175
1176class SimpleParser(object):
1177 """
1178 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1179 """
1180
1181 ## @name Parser state.
1182 ## @{
1183 kiCode = 0;
1184 kiCommentMulti = 1;
1185 ## @}
1186
1187 def __init__(self, sSrcFile, asLines, sDefaultMap):
1188 self.sSrcFile = sSrcFile;
1189 self.asLines = asLines;
1190 self.iLine = 0;
1191 self.iState = self.kiCode;
1192 self.sComment = '';
1193 self.iCommentLine = 0;
1194 self.aoCurInstrs = [];
1195
1196 assert sDefaultMap in g_dInstructionMaps;
1197 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1198
1199 self.cTotalInstr = 0;
1200 self.cTotalStubs = 0;
1201 self.cTotalTagged = 0;
1202
1203 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1204 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1205 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1206 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1207 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1208 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1209 self.fDebug = True;
1210
1211 self.dTagHandlers = {
1212 '@opbrief': self.parseTagOpBrief,
1213 '@opdesc': self.parseTagOpDesc,
1214 '@opmnemonic': self.parseTagOpMnemonic,
1215 '@op1': self.parseTagOpOperandN,
1216 '@op2': self.parseTagOpOperandN,
1217 '@op3': self.parseTagOpOperandN,
1218 '@op4': self.parseTagOpOperandN,
1219 '@oppfx': self.parseTagOpPfx,
1220 '@opmaps': self.parseTagOpMaps,
1221 '@opcode': self.parseTagOpcode,
1222 '@openc': self.parseTagOpEnc,
1223 '@opfltest': self.parseTagOpEFlags,
1224 '@opflmodify': self.parseTagOpEFlags,
1225 '@opflundef': self.parseTagOpEFlags,
1226 '@opflset': self.parseTagOpEFlags,
1227 '@opflclear': self.parseTagOpEFlags,
1228 '@ophints': self.parseTagOpHints,
1229 '@opdisenum': self.parseTagOpDisEnum,
1230 '@opmincpu': self.parseTagOpMinCpu,
1231 '@opcpuid': self.parseTagOpCpuId,
1232 '@opgroup': self.parseTagOpGroup,
1233 '@opunused': self.parseTagOpUnusedInvalid,
1234 '@opinvalid': self.parseTagOpUnusedInvalid,
1235 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1236 '@optest': self.parseTagOpTest,
1237 '@optestign': self.parseTagOpTestIgnore,
1238 '@optestignore': self.parseTagOpTestIgnore,
1239 '@opcopytests': self.parseTagOpCopyTests,
1240 '@oponlytest': self.parseTagOpOnlyTest,
1241 '@opstats': self.parseTagOpStats,
1242 '@opfunction': self.parseTagOpFunction,
1243 '@opdone': self.parseTagOpDone,
1244 };
1245
1246 self.asErrors = [];
1247
1248 def raiseError(self, sMessage):
1249 """
1250 Raise error prefixed with the source and line number.
1251 """
1252 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1253
1254 def raiseCommentError(self, iLineInComment, sMessage):
1255 """
1256 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1257 """
1258 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1259
1260 def error(self, sMessage):
1261 """
1262 Adds an error.
1263 returns False;
1264 """
1265 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1266 return False;
1267
1268 def errorComment(self, iLineInComment, sMessage):
1269 """
1270 Adds a comment error.
1271 returns False;
1272 """
1273 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1274 return False;
1275
1276 def printErrors(self):
1277 """
1278 Print the errors to stderr.
1279 Returns number of errors.
1280 """
1281 if self.asErrors:
1282 sys.stderr.write(u''.join(self.asErrors));
1283 return len(self.asErrors);
1284
1285 def debug(self, sMessage):
1286 """
1287 For debugging.
1288 """
1289 if self.fDebug:
1290 print('debug: %s' % (sMessage,));
1291
1292
1293 def addInstruction(self, iLine = None):
1294 """
1295 Adds an instruction.
1296 """
1297 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1298 g_aoAllInstructions.append(oInstr);
1299 self.aoCurInstrs.append(oInstr);
1300 return oInstr;
1301
1302 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1303 """
1304 Derives the mnemonic and operands from a IEM stats base name like string.
1305 """
1306 if oInstr.sMnemonic is None:
1307 asWords = sStats.split('_');
1308 oInstr.sMnemonic = asWords[0].lower();
1309 if len(asWords) > 1 and not oInstr.aoOperands:
1310 for sType in asWords[1:]:
1311 if sType in g_kdOpTypes:
1312 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1313 else:
1314 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1315 return False;
1316 return True;
1317
1318 def doneInstructionOne(self, oInstr, iLine):
1319 """
1320 Complete the parsing by processing, validating and expanding raw inputs.
1321 """
1322 assert oInstr.iLineCompleted is None;
1323 oInstr.iLineCompleted = iLine;
1324
1325 #
1326 # Specified instructions.
1327 #
1328 if oInstr.cOpTags > 0:
1329 if oInstr.sStats is None:
1330 pass;
1331
1332 #
1333 # Unspecified legacy stuff. We generally only got a few things to go on here.
1334 # /** Opcode 0x0f 0x00 /0. */
1335 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1336 #
1337 else:
1338 #if oInstr.sRawOldOpcodes:
1339 #
1340 #if oInstr.sMnemonic:
1341 pass;
1342
1343 #
1344 # Common defaults.
1345 #
1346
1347 # Guess mnemonic and operands from stats if the former is missing.
1348 if oInstr.sMnemonic is None:
1349 if oInstr.sStats is not None:
1350 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1351 elif oInstr.sFunction is not None:
1352 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1353
1354 # Derive the disassembler op enum constant from the mnemonic.
1355 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1356 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1357
1358 # Derive the IEM statistics base name from mnemonic and operand types.
1359 if oInstr.sStats is None:
1360 if oInstr.sFunction is not None:
1361 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1362 elif oInstr.sMnemonic is not None:
1363 oInstr.sStats = oInstr.sMnemonic;
1364 for oOperand in oInstr.aoOperands:
1365 if oOperand.sType:
1366 oInstr.sStats += '_' + oOperand.sType;
1367
1368 # Derive the IEM function name from mnemonic and operand types.
1369 if oInstr.sFunction is None:
1370 if oInstr.sMnemonic is not None:
1371 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1372 for oOperand in oInstr.aoOperands:
1373 if oOperand.sType:
1374 oInstr.sFunction += '_' + oOperand.sType;
1375 elif oInstr.sStats:
1376 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1377
1378 # Derive encoding from operands.
1379 if oInstr.sEncoding is None:
1380 if not oInstr.aoOperands:
1381 oInstr.sEncoding = 'fixed';
1382 elif oInstr.aoOperands[0].usesModRM():
1383 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1384 oInstr.sEncoding = 'ModR/M+VEX';
1385 else:
1386 oInstr.sEncoding = 'ModR/M';
1387
1388 #
1389 # Apply default map and then add the instruction to all it's groups.
1390 #
1391 if not oInstr.aoMaps:
1392 oInstr.aoMaps = [ self.oDefaultMap, ];
1393 for oMap in oInstr.aoMaps:
1394 oMap.aoInstructions.append(oInstr);
1395
1396 #
1397 # Check the opstat value and add it to the opstat indexed dictionary.
1398 #
1399 if oInstr.sStats:
1400 if oInstr.sStats not in g_dAllInstructionsByStat:
1401 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1402 else:
1403 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1404 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1405
1406 #
1407 # Add to function indexed dictionary. We allow multiple instructions per function.
1408 #
1409 if oInstr.sFunction:
1410 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1411 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1412 else:
1413 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1414
1415 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1416 return True;
1417
1418 def doneInstructions(self, iLineInComment = None):
1419 """
1420 Done with current instruction.
1421 """
1422 for oInstr in self.aoCurInstrs:
1423 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1424 if oInstr.fStub:
1425 self.cTotalStubs += 1;
1426
1427 self.cTotalInstr += len(self.aoCurInstrs);
1428
1429 self.sComment = '';
1430 self.aoCurInstrs = [];
1431 return True;
1432
1433 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1434 """
1435 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1436 is False, only None values and empty strings are replaced.
1437 """
1438 for oInstr in self.aoCurInstrs:
1439 if fOverwrite is not True:
1440 oOldValue = getattr(oInstr, sAttrib);
1441 if oOldValue is not None:
1442 continue;
1443 setattr(oInstr, sAttrib, oValue);
1444
1445 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1446 """
1447 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1448 If fOverwrite is False, only None values and empty strings are replaced.
1449 """
1450 for oInstr in self.aoCurInstrs:
1451 aoArray = getattr(oInstr, sAttrib);
1452 while len(aoArray) <= iEntry:
1453 aoArray.append(None);
1454 if fOverwrite is True or aoArray[iEntry] is None:
1455 aoArray[iEntry] = oValue;
1456
1457 def parseCommentOldOpcode(self, asLines):
1458 """ Deals with 'Opcode 0xff /4' like comments """
1459 asWords = asLines[0].split();
1460 if len(asWords) >= 2 \
1461 and asWords[0] == 'Opcode' \
1462 and ( asWords[1].startswith('0x')
1463 or asWords[1].startswith('0X')):
1464 asWords = asWords[:1];
1465 for iWord, sWord in enumerate(asWords):
1466 if sWord.startswith('0X'):
1467 sWord = '0x' + sWord[:2];
1468 asWords[iWord] = asWords;
1469 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1470
1471 return False;
1472
1473 def ensureInstructionForOpTag(self, iTagLine):
1474 """ Ensure there is an instruction for the op-tag being parsed. """
1475 if not self.aoCurInstrs:
1476 self.addInstruction(self.iCommentLine + iTagLine);
1477 for oInstr in self.aoCurInstrs:
1478 oInstr.cOpTags += 1;
1479 if oInstr.cOpTags == 1:
1480 self.cTotalTagged += 1;
1481 return self.aoCurInstrs[-1];
1482
1483 @staticmethod
1484 def flattenSections(aasSections):
1485 """
1486 Flattens multiline sections into stripped single strings.
1487 Returns list of strings, on section per string.
1488 """
1489 asRet = [];
1490 for asLines in aasSections:
1491 if asLines:
1492 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1493 return asRet;
1494
1495 @staticmethod
1496 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1497 """
1498 Flattens sections into a simple stripped string with newlines as
1499 section breaks. The final section does not sport a trailing newline.
1500 """
1501 # Typical: One section with a single line.
1502 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1503 return aasSections[0][0].strip();
1504
1505 sRet = '';
1506 for iSection, asLines in enumerate(aasSections):
1507 if asLines:
1508 if iSection > 0:
1509 sRet += sSectionSep;
1510 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1511 return sRet;
1512
1513
1514
1515 ## @name Tag parsers
1516 ## @{
1517
1518 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1519 """
1520 Tag: \@opbrief
1521 Value: Text description, multiple sections, appended.
1522
1523 Brief description. If not given, it's the first sentence from @opdesc.
1524 """
1525 oInstr = self.ensureInstructionForOpTag(iTagLine);
1526
1527 # Flatten and validate the value.
1528 sBrief = self.flattenAllSections(aasSections);
1529 if not sBrief:
1530 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1531 if sBrief[-1] != '.':
1532 sBrief = sBrief + '.';
1533 if len(sBrief) > 180:
1534 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1535 offDot = sBrief.find('.');
1536 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1537 offDot = sBrief.find('.', offDot + 1);
1538 if offDot >= 0 and offDot != len(sBrief) - 1:
1539 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1540
1541 # Update the instruction.
1542 if oInstr.sBrief is not None:
1543 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1544 % (sTag, oInstr.sBrief, sBrief,));
1545 _ = iEndLine;
1546 return True;
1547
1548 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1549 """
1550 Tag: \@opdesc
1551 Value: Text description, multiple sections, appended.
1552
1553 It is used to describe instructions.
1554 """
1555 oInstr = self.ensureInstructionForOpTag(iTagLine);
1556 if aasSections:
1557 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1558 return True;
1559
1560 _ = sTag; _ = iEndLine;
1561 return True;
1562
1563 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1564 """
1565 Tag: @opmenmonic
1566 Value: mnemonic
1567
1568 The 'mnemonic' value must be a valid C identifier string. Because of
1569 prefixes, groups and whatnot, there times when the mnemonic isn't that
1570 of an actual assembler mnemonic.
1571 """
1572 oInstr = self.ensureInstructionForOpTag(iTagLine);
1573
1574 # Flatten and validate the value.
1575 sMnemonic = self.flattenAllSections(aasSections);
1576 if not self.oReMnemonic.match(sMnemonic):
1577 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1578 if oInstr.sMnemonic is not None:
1579 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1580 % (sTag, oInstr.sMnemonic, sMnemonic,));
1581 oInstr.sMnemonic = sMnemonic
1582
1583 _ = iEndLine;
1584 return True;
1585
1586 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1587 """
1588 Tags: \@op1, \@op2, \@op3, \@op4
1589 Value: [where:]type
1590
1591 The 'where' value indicates where the operand is found, like the 'reg'
1592 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1593 a list.
1594
1595 The 'type' value indicates the operand type. These follow the types
1596 given in the opcode tables in the CPU reference manuals.
1597 See Instruction.kdOperandTypes for a list.
1598
1599 """
1600 oInstr = self.ensureInstructionForOpTag(iTagLine);
1601 idxOp = int(sTag[-1]) - 1;
1602 assert idxOp >= 0 and idxOp < 4;
1603
1604 # flatten, split up, and validate the "where:type" value.
1605 sFlattened = self.flattenAllSections(aasSections);
1606 asSplit = sFlattened.split(':');
1607 if len(asSplit) == 1:
1608 sType = asSplit[0];
1609 sWhere = None;
1610 elif len(asSplit) == 2:
1611 (sWhere, sType) = asSplit;
1612 else:
1613 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1614
1615 if sType not in g_kdOpTypes:
1616 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1617 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1618 if sWhere is None:
1619 sWhere = g_kdOpTypes[sType][1];
1620 elif sWhere not in g_kdOpLocations:
1621 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1622 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1623
1624 # Insert the operand, refusing to overwrite an existing one.
1625 while idxOp >= len(oInstr.aoOperands):
1626 oInstr.aoOperands.append(None);
1627 if oInstr.aoOperands[idxOp] is not None:
1628 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1629 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1630 sWhere, sType,));
1631 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1632
1633 _ = iEndLine;
1634 return True;
1635
1636 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1637 """
1638 Tag: \@opmaps
1639 Value: map[,map2]
1640
1641 Indicates which maps the instruction is in. There is a default map
1642 associated with each input file.
1643 """
1644 oInstr = self.ensureInstructionForOpTag(iTagLine);
1645
1646 # Flatten, split up and validate the value.
1647 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1648 asMaps = sFlattened.split(',');
1649 if not asMaps:
1650 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1651 for sMap in asMaps:
1652 if sMap not in g_dInstructionMaps:
1653 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1654 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1655
1656 # Add the maps to the current list. Throw errors on duplicates.
1657 for oMap in oInstr.aoMaps:
1658 if oMap.sName in asMaps:
1659 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1660
1661 for sMap in asMaps:
1662 oMap = g_dInstructionMaps[sMap];
1663 if oMap not in oInstr.aoMaps:
1664 oInstr.aoMaps.append(oMap);
1665 else:
1666 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1667
1668 _ = iEndLine;
1669 return True;
1670
1671 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1672 """
1673 Tag: \@oppfx
1674 Value: none|0x66|0xf3|0xf2
1675
1676 Required prefix for the instruction. (In a (E)VEX context this is the
1677 value of the 'pp' field rather than an actual prefix.)
1678 """
1679 oInstr = self.ensureInstructionForOpTag(iTagLine);
1680
1681 # Flatten and validate the value.
1682 sFlattened = self.flattenAllSections(aasSections);
1683 asPrefixes = sFlattened.split();
1684 if len(asPrefixes) > 1:
1685 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1686
1687 sPrefix = asPrefixes[0].lower();
1688 if sPrefix == 'none':
1689 sPrefix = None;
1690 else:
1691 if len(sPrefix) == 2:
1692 sPrefix = '0x' + sPrefix;
1693 if _isValidOpcodeByte(sPrefix):
1694 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1695
1696 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1697 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1698
1699 # Set it.
1700 if oInstr.sPrefix is not None:
1701 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1702 oInstr.sPrefix = sPrefix;
1703
1704 _ = iEndLine;
1705 return True;
1706
1707 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1708 """
1709 Tag: \@opcode
1710 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1711
1712 The opcode byte or sub-byte for the instruction in the context of a map.
1713 """
1714 oInstr = self.ensureInstructionForOpTag(iTagLine);
1715
1716 # Flatten and validate the value.
1717 sOpcode = self.flattenAllSections(aasSections);
1718 if sOpcode in g_kdSpecialOpcodes:
1719 pass;
1720 elif not _isValidOpcodeByte(sOpcode):
1721 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1722
1723 # Set it.
1724 if oInstr.sOpcode is not None:
1725 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1726 oInstr.sOpcode = sOpcode;
1727
1728 _ = iEndLine;
1729 return True;
1730
1731 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1732 """
1733 Tag: \@openc
1734 Value: ModR/M|fixed|prefix|<map name>
1735
1736 The instruction operand encoding style.
1737 """
1738 oInstr = self.ensureInstructionForOpTag(iTagLine);
1739
1740 # Flatten and validate the value.
1741 sEncoding = self.flattenAllSections(aasSections);
1742 if sEncoding in g_kdEncodings:
1743 pass;
1744 elif sEncoding in g_dInstructionMaps:
1745 pass;
1746 elif not _isValidOpcodeByte(sEncoding):
1747 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1748
1749 # Set it.
1750 if oInstr.sEncoding is not None:
1751 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1752 % ( sTag, oInstr.sEncoding, sEncoding,));
1753 oInstr.sEncoding = sEncoding;
1754
1755 _ = iEndLine;
1756 return True;
1757
1758 ## EFlags tag to Instruction attribute name.
1759 kdOpFlagToAttr = {
1760 '@opfltest': 'asFlTest',
1761 '@opflmodify': 'asFlModify',
1762 '@opflundef': 'asFlUndefined',
1763 '@opflset': 'asFlSet',
1764 '@opflclear': 'asFlClear',
1765 };
1766
1767 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1768 """
1769 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1770 Value: <eflags specifier>
1771
1772 """
1773 oInstr = self.ensureInstructionForOpTag(iTagLine);
1774
1775 # Flatten, split up and validate the values.
1776 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1777 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1778 asFlags = [];
1779 else:
1780 fRc = True;
1781 for iFlag, sFlag in enumerate(asFlags):
1782 if sFlag not in g_kdEFlagsMnemonics:
1783 if sFlag.strip() in g_kdEFlagsMnemonics:
1784 asFlags[iFlag] = sFlag.strip();
1785 else:
1786 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1787 if not fRc:
1788 return False;
1789
1790 # Set them.
1791 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1792 if asOld is not None:
1793 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1794 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1795
1796 _ = iEndLine;
1797 return True;
1798
1799 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1800 """
1801 Tag: \@ophints
1802 Value: Comma or space separated list of flags and hints.
1803
1804 This covers the disassembler flags table and more.
1805 """
1806 oInstr = self.ensureInstructionForOpTag(iTagLine);
1807
1808 # Flatten as a space separated list, split it up and validate the values.
1809 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1810 if len(asHints) == 1 and asHints[0].lower() == 'none':
1811 asHints = [];
1812 else:
1813 fRc = True;
1814 for iHint, sHint in enumerate(asHints):
1815 if sHint not in g_kdHints:
1816 if sHint.strip() in g_kdHints:
1817 sHint[iHint] = sHint.strip();
1818 else:
1819 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1820 if not fRc:
1821 return False;
1822
1823 # Append them.
1824 for sHint in asHints:
1825 if sHint not in oInstr.dHints:
1826 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1827 else:
1828 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1829
1830 _ = iEndLine;
1831 return True;
1832
1833 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1834 """
1835 Tag: \@opdisenum
1836 Value: OP_XXXX
1837
1838 This is for select a specific (legacy) disassembler enum value for the
1839 instruction.
1840 """
1841 oInstr = self.ensureInstructionForOpTag(iTagLine);
1842
1843 # Flatten and split.
1844 asWords = self.flattenAllSections(aasSections).split();
1845 if len(asWords) != 1:
1846 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1847 if not asWords:
1848 return False;
1849 sDisEnum = asWords[0];
1850 if not self.oReDisEnum.match(sDisEnum):
1851 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1852 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1853
1854 # Set it.
1855 if oInstr.sDisEnum is not None:
1856 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1857 oInstr.sDisEnum = sDisEnum;
1858
1859 _ = iEndLine;
1860 return True;
1861
1862 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1863 """
1864 Tag: \@opmincpu
1865 Value: <simple CPU name>
1866
1867 Indicates when this instruction was introduced.
1868 """
1869 oInstr = self.ensureInstructionForOpTag(iTagLine);
1870
1871 # Flatten the value, split into words, make sure there's just one, valid it.
1872 asCpus = self.flattenAllSections(aasSections).split();
1873 if len(asCpus) > 1:
1874 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1875
1876 sMinCpu = asCpus[0];
1877 if sMinCpu in g_kdCpuNames:
1878 oInstr.sMinCpu = sMinCpu;
1879 else:
1880 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1881 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1882
1883 # Set it.
1884 if oInstr.sMinCpu is None:
1885 oInstr.sMinCpu = sMinCpu;
1886 elif oInstr.sMinCpu != sMinCpu:
1887 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1888
1889 _ = iEndLine;
1890 return True;
1891
1892 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1893 """
1894 Tag: \@opcpuid
1895 Value: none | <CPUID flag specifier>
1896
1897 CPUID feature bit which is required for the instruction to be present.
1898 """
1899 oInstr = self.ensureInstructionForOpTag(iTagLine);
1900
1901 # Flatten as a space separated list, split it up and validate the values.
1902 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1903 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1904 asCpuIds = [];
1905 else:
1906 fRc = True;
1907 for iCpuId, sCpuId in enumerate(asCpuIds):
1908 if sCpuId not in g_kdCpuIdFlags:
1909 if sCpuId.strip() in g_kdCpuIdFlags:
1910 sCpuId[iCpuId] = sCpuId.strip();
1911 else:
1912 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1913 if not fRc:
1914 return False;
1915
1916 # Append them.
1917 for sCpuId in asCpuIds:
1918 if sCpuId not in oInstr.asCpuIds:
1919 oInstr.asCpuIds.append(sCpuId);
1920 else:
1921 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1922
1923 _ = iEndLine;
1924 return True;
1925
1926 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1927 """
1928 Tag: \@opgroup
1929 Value: op_grp1[_subgrp2[_subsubgrp3]]
1930
1931 Instruction grouping.
1932 """
1933 oInstr = self.ensureInstructionForOpTag(iTagLine);
1934
1935 # Flatten as a space separated list, split it up and validate the values.
1936 asGroups = self.flattenAllSections(aasSections).split();
1937 if len(asGroups) != 1:
1938 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1939 sGroup = asGroups[0];
1940 if not self.oReGroupName.match(sGroup):
1941 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1942 % (sTag, sGroup, self.oReGroupName.pattern));
1943
1944 # Set it.
1945 if oInstr.sGroup is not None:
1946 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1947 oInstr.sGroup = sGroup;
1948
1949 _ = iEndLine;
1950 return True;
1951
1952 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1953 """
1954 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1955 Value: <invalid opcode behaviour style>
1956
1957 The \@opunused indicates the specification is for a currently unused
1958 instruction encoding.
1959
1960 The \@opinvalid indicates the specification is for an invalid currently
1961 instruction encoding (like UD2).
1962
1963 The \@opinvlstyle just indicates how CPUs decode the instruction when
1964 not supported (\@opcpuid, \@opmincpu) or disabled.
1965 """
1966 oInstr = self.ensureInstructionForOpTag(iTagLine);
1967
1968 # Flatten as a space separated list, split it up and validate the values.
1969 asStyles = self.flattenAllSections(aasSections).split();
1970 if len(asStyles) != 1:
1971 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1972 sStyle = asStyles[0];
1973 if sStyle not in g_kdInvalidStyles:
1974 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1975 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
1976 # Set it.
1977 if oInstr.sInvlStyle is not None:
1978 return self.errorComment(iTagLine,
1979 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1980 % ( sTag, oInstr.sInvlStyle, sStyle,));
1981 oInstr.sInvlStyle = sStyle;
1982 if sTag == '@opunused':
1983 oInstr.fUnused = True;
1984 elif sTag == '@opinvalid':
1985 oInstr.fInvalid = True;
1986
1987 _ = iEndLine;
1988 return True;
1989
1990 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
1991 """
1992 Tag: \@optest
1993 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1994 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1995
1996 The main idea here is to generate basic instruction tests.
1997
1998 The probably simplest way of handling the diverse input, would be to use
1999 it to produce size optimized byte code for a simple interpreter that
2000 modifies the register input and output states.
2001
2002 An alternative to the interpreter would be creating multiple tables,
2003 but that becomes rather complicated wrt what goes where and then to use
2004 them in an efficient manner.
2005 """
2006 oInstr = self.ensureInstructionForOpTag(iTagLine);
2007
2008 #
2009 # Do it section by section.
2010 #
2011 for asSectionLines in aasSections:
2012 #
2013 # Sort the input into outputs, inputs and selector conditions.
2014 #
2015 sFlatSection = self.flattenAllSections([asSectionLines,]);
2016 if not sFlatSection:
2017 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
2018 continue;
2019 oTest = InstructionTest(oInstr);
2020
2021 asSelectors = [];
2022 asInputs = [];
2023 asOutputs = [];
2024 asCur = asOutputs;
2025 fRc = True;
2026 asWords = sFlatSection.split();
2027 for iWord in range(len(asWords) - 1, -1, -1):
2028 sWord = asWords[iWord];
2029 # Check for array switchers.
2030 if sWord == '->':
2031 if asCur != asOutputs:
2032 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2033 break;
2034 asCur = asInputs;
2035 elif sWord == '/':
2036 if asCur != asInputs:
2037 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2038 break;
2039 asCur = asSelectors;
2040 else:
2041 asCur.insert(0, sWord);
2042
2043 #
2044 # Validate and add selectors.
2045 #
2046 for sCond in asSelectors:
2047 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2048 oSelector = None;
2049 for sOp in TestSelector.kasCompareOps:
2050 off = sCondExp.find(sOp);
2051 if off >= 0:
2052 sVariable = sCondExp[:off];
2053 sValue = sCondExp[off + len(sOp):];
2054 if sVariable in TestSelector.kdVariables:
2055 if sValue in TestSelector.kdVariables[sVariable]:
2056 oSelector = TestSelector(sVariable, sOp, sValue);
2057 else:
2058 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2059 % ( sTag, sValue, sCond,
2060 TestSelector.kdVariables[sVariable].keys(),));
2061 else:
2062 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2063 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2064 break;
2065 if oSelector is not None:
2066 for oExisting in oTest.aoSelectors:
2067 if oExisting.sVariable == oSelector.sVariable:
2068 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2069 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2070 oTest.aoSelectors.append(oSelector);
2071 else:
2072 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2073
2074 #
2075 # Validate outputs and inputs, adding them to the test as we go along.
2076 #
2077 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2078 for sItem in asItems:
2079 oItem = None;
2080 for sOp in TestInOut.kasOperators:
2081 off = sItem.find(sOp);
2082 if off >= 0:
2083 sField = sItem[:off];
2084 sValueType = sItem[off + len(sOp):];
2085 if sField in TestInOut.kdFields:
2086 asSplit = sValueType.split(':', 1);
2087 sValue = asSplit[0];
2088 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2089 if sType in TestInOut.kdTypes:
2090 oValid = TestInOut.kdTypes[sType].validate(sValue);
2091 if oValid is True:
2092 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2093 oItem = TestInOut(sField, sOp, sValue, sType);
2094 else:
2095 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2096 % ( sTag, sDesc, sItem, ));
2097 else:
2098 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2099 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2100 else:
2101 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2102 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2103 else:
2104 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
2105 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
2106 break;
2107 if oItem is not None:
2108 for oExisting in aoDst:
2109 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2110 self.errorComment(iTagLine,
2111 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2112 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2113 aoDst.append(oItem);
2114 else:
2115 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2116
2117 #
2118 # .
2119 #
2120 if fRc:
2121 oInstr.aoTests.append(oTest);
2122 else:
2123 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2124 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2125 % (sTag, asSelectors, asInputs, asOutputs,));
2126
2127 _ = iEndLine;
2128 return True;
2129
2130 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2131 """
2132 Tag: \@optestign | \@optestignore
2133 Value: <value is ignored>
2134
2135 This is a simple trick to ignore a test while debugging another.
2136
2137 See also \@oponlytest.
2138 """
2139 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2140 return True;
2141
2142 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2143 """
2144 Tag: \@opcopytests
2145 Value: <opstat | function> [..]
2146 Example: \@opcopytests add_Eb_Gb
2147
2148 Trick to avoid duplicating tests for different encodings of the same
2149 operation.
2150 """
2151 oInstr = self.ensureInstructionForOpTag(iTagLine);
2152
2153 # Flatten, validate and append the copy job to the instruction. We execute
2154 # them after parsing all the input so we can handle forward references.
2155 asToCopy = self.flattenAllSections(aasSections).split();
2156 if not asToCopy:
2157 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2158 for sToCopy in asToCopy:
2159 if sToCopy not in oInstr.asCopyTests:
2160 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2161 oInstr.asCopyTests.append(sToCopy);
2162 else:
2163 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2164 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2165 else:
2166 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2167
2168 _ = iEndLine;
2169 return True;
2170
2171 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2172 """
2173 Tag: \@oponlytest
2174 Value: none
2175
2176 Only test instructions with this tag. This is a trick that is handy
2177 for singling out one or two new instructions or tests.
2178
2179 See also \@optestignore.
2180 """
2181 oInstr = self.ensureInstructionForOpTag(iTagLine);
2182
2183 # Validate and add instruction to only test dictionary.
2184 sValue = self.flattenAllSections(aasSections).strip();
2185 if sValue:
2186 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2187
2188 if oInstr not in g_aoOnlyTestInstructions:
2189 g_aoOnlyTestInstructions.append(oInstr);
2190
2191 _ = iEndLine;
2192 return True;
2193
2194 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2195 """
2196 Tag: \@opfunction
2197 Value: <VMM function name>
2198
2199 This is for explicitly setting the IEM function name. Normally we pick
2200 this up from the FNIEMOP_XXX macro invocation after the description, or
2201 generate it from the mnemonic and operands.
2202
2203 It it thought it maybe necessary to set it when specifying instructions
2204 which implementation isn't following immediately or aren't implemented yet.
2205 """
2206 oInstr = self.ensureInstructionForOpTag(iTagLine);
2207
2208 # Flatten and validate the value.
2209 sFunction = self.flattenAllSections(aasSections);
2210 if not self.oReFunctionName.match(sFunction):
2211 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2212 % (sTag, sFunction, self.oReFunctionName.pattern));
2213
2214 if oInstr.sFunction is not None:
2215 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2216 % (sTag, oInstr.sFunction, sFunction,));
2217 oInstr.sFunction = sFunction;
2218
2219 _ = iEndLine;
2220 return True;
2221
2222 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2223 """
2224 Tag: \@opstats
2225 Value: <VMM statistics base name>
2226
2227 This is for explicitly setting the statistics name. Normally we pick
2228 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2229 the mnemonic and operands.
2230
2231 It it thought it maybe necessary to set it when specifying instructions
2232 which implementation isn't following immediately or aren't implemented yet.
2233 """
2234 oInstr = self.ensureInstructionForOpTag(iTagLine);
2235
2236 # Flatten and validate the value.
2237 sStats = self.flattenAllSections(aasSections);
2238 if not self.oReStatsName.match(sStats):
2239 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2240 % (sTag, sStats, self.oReStatsName.pattern));
2241
2242 if oInstr.sStats is not None:
2243 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2244 % (sTag, oInstr.sStats, sStats,));
2245 oInstr.sStats = sStats;
2246
2247 _ = iEndLine;
2248 return True;
2249
2250 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2251 """
2252 Tag: \@opdone
2253 Value: none
2254
2255 Used to explictily flush the instructions that have been specified.
2256 """
2257 sFlattened = self.flattenAllSections(aasSections);
2258 if sFlattened != '':
2259 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2260 _ = sTag; _ = iEndLine;
2261 return self.doneInstructions();
2262
2263 ## @}
2264
2265
2266 def parseComment(self):
2267 """
2268 Parse the current comment (self.sComment).
2269
2270 If it's a opcode specifiying comment, we reset the macro stuff.
2271 """
2272 #
2273 # Reject if comment doesn't seem to contain anything interesting.
2274 #
2275 if self.sComment.find('Opcode') < 0 \
2276 and self.sComment.find('@') < 0:
2277 return False;
2278
2279 #
2280 # Split the comment into lines, removing leading asterisks and spaces.
2281 # Also remove leading and trailing empty lines.
2282 #
2283 asLines = self.sComment.split('\n');
2284 for iLine, sLine in enumerate(asLines):
2285 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2286
2287 while asLines and not asLines[0]:
2288 self.iCommentLine += 1;
2289 asLines.pop(0);
2290
2291 while asLines and not asLines[-1]:
2292 asLines.pop(len(asLines) - 1);
2293
2294 #
2295 # Check for old style: Opcode 0x0f 0x12
2296 #
2297 if asLines[0].startswith('Opcode '):
2298 self.parseCommentOldOpcode(asLines);
2299
2300 #
2301 # Look for @op* tagged data.
2302 #
2303 cOpTags = 0;
2304 sFlatDefault = None;
2305 sCurTag = '@default';
2306 iCurTagLine = 0;
2307 asCurSection = [];
2308 aasSections = [ asCurSection, ];
2309 for iLine, sLine in enumerate(asLines):
2310 if not sLine.startswith('@'):
2311 if sLine:
2312 asCurSection.append(sLine);
2313 elif asCurSection:
2314 asCurSection = [];
2315 aasSections.append(asCurSection);
2316 else:
2317 #
2318 # Process the previous tag.
2319 #
2320 if sCurTag in self.dTagHandlers:
2321 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2322 cOpTags += 1;
2323 elif sCurTag.startswith('@op'):
2324 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2325 elif sCurTag == '@default':
2326 sFlatDefault = self.flattenAllSections(aasSections);
2327 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2328 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2329 elif sCurTag in ['@encoding', '@opencoding']:
2330 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2331
2332 #
2333 # New tag.
2334 #
2335 asSplit = sLine.split(None, 1);
2336 sCurTag = asSplit[0].lower();
2337 if len(asSplit) > 1:
2338 asCurSection = [asSplit[1],];
2339 else:
2340 asCurSection = [];
2341 aasSections = [asCurSection, ];
2342 iCurTagLine = iLine;
2343
2344 #
2345 # Process the final tag.
2346 #
2347 if sCurTag in self.dTagHandlers:
2348 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2349 cOpTags += 1;
2350 elif sCurTag.startswith('@op'):
2351 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2352 elif sCurTag == '@default':
2353 sFlatDefault = self.flattenAllSections(aasSections);
2354
2355 #
2356 # Don't allow default text in blocks containing @op*.
2357 #
2358 if cOpTags > 0 and sFlatDefault:
2359 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2360
2361 return True;
2362
2363 def parseMacroInvocation(self, sInvocation):
2364 """
2365 Parses a macro invocation.
2366
2367 Returns a tuple, first element is the offset following the macro
2368 invocation. The second element is a list of macro arguments, where the
2369 zero'th is the macro name.
2370 """
2371 # First the name.
2372 offOpen = sInvocation.find('(');
2373 if offOpen <= 0:
2374 self.raiseError("macro invocation open parenthesis not found");
2375 sName = sInvocation[:offOpen].strip();
2376 if not self.oReMacroName.match(sName):
2377 return self.error("invalid macro name '%s'" % (sName,));
2378 asRet = [sName, ];
2379
2380 # Arguments.
2381 iLine = self.iLine;
2382 cDepth = 1;
2383 off = offOpen + 1;
2384 offStart = off;
2385 while cDepth > 0:
2386 if off >= len(sInvocation):
2387 if iLine >= len(self.asLines):
2388 return self.error('macro invocation beyond end of file');
2389 sInvocation += self.asLines[iLine];
2390 iLine += 1;
2391 ch = sInvocation[off];
2392
2393 if ch == ',' or ch == ')':
2394 if cDepth == 1:
2395 asRet.append(sInvocation[offStart:off].strip());
2396 offStart = off + 1;
2397 if ch == ')':
2398 cDepth -= 1;
2399 elif ch == '(':
2400 cDepth += 1;
2401 off += 1;
2402
2403 return (off, asRet);
2404
2405 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2406 """
2407 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2408 """
2409 offHit = sCode.find(sMacro);
2410 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2411 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2412 return (offHit + offAfter, asRet);
2413 return (len(sCode), None);
2414
2415 def findAndParseMacroInvocation(self, sCode, sMacro):
2416 """
2417 Returns None if not found, arguments as per parseMacroInvocation if found.
2418 """
2419 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2420
2421 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2422 """
2423 Returns same as findAndParseMacroInvocation.
2424 """
2425 for sMacro in asMacro:
2426 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2427 if asRet is not None:
2428 return asRet;
2429 return None;
2430
2431 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2432 sDisHints, sIemHints, asOperands):
2433 """
2434 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2435 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2436 """
2437 #
2438 # Some invocation checks.
2439 #
2440 if sUpper != sUpper.upper():
2441 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2442 if sLower != sLower.lower():
2443 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2444 if sUpper.lower() != sLower:
2445 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2446 if not self.oReMnemonic.match(sLower):
2447 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2448
2449 #
2450 # Check if sIemHints tells us to not consider this macro invocation.
2451 #
2452 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2453 return True;
2454
2455 # Apply to the last instruction only for now.
2456 if not self.aoCurInstrs:
2457 self.addInstruction();
2458 oInstr = self.aoCurInstrs[-1];
2459 if oInstr.iLineMnemonicMacro == -1:
2460 oInstr.iLineMnemonicMacro = self.iLine;
2461 else:
2462 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2463 % (sMacro, oInstr.iLineMnemonicMacro,));
2464
2465 # Mnemonic
2466 if oInstr.sMnemonic is None:
2467 oInstr.sMnemonic = sLower;
2468 elif oInstr.sMnemonic != sLower:
2469 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2470
2471 # Process operands.
2472 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2473 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2474 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2475 for iOperand, sType in enumerate(asOperands):
2476 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2477 if sWhere is None:
2478 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2479 if iOperand < len(oInstr.aoOperands): # error recovery.
2480 sWhere = oInstr.aoOperands[iOperand].sWhere;
2481 sType = oInstr.aoOperands[iOperand].sType;
2482 else:
2483 sWhere = 'reg';
2484 sType = 'Gb';
2485 if iOperand == len(oInstr.aoOperands):
2486 oInstr.aoOperands.append(Operand(sWhere, sType))
2487 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2488 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2489 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2490 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2491
2492 # Encoding.
2493 if sForm not in g_kdIemForms:
2494 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2495 else:
2496 if oInstr.sEncoding is None:
2497 oInstr.sEncoding = g_kdIemForms[sForm][0];
2498 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2499 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2500 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2501
2502 # Check the parameter locations for the encoding.
2503 if g_kdIemForms[sForm][1] is not None:
2504 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2505 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2506 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2507 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2508
2509 # Stats.
2510 if not self.oReStatsName.match(sStats):
2511 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2512 elif oInstr.sStats is None:
2513 oInstr.sStats = sStats;
2514 elif oInstr.sStats != sStats:
2515 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2516 % (sMacro, oInstr.sStats, sStats,));
2517
2518 # Process the hints (simply merge with @ophints w/o checking anything).
2519 for sHint in sDisHints.split('|'):
2520 sHint = sHint.strip();
2521 if sHint.startswith('DISOPTYPE_'):
2522 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2523 if sShortHint in g_kdHints:
2524 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2525 else:
2526 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2527 elif sHint != '0':
2528 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2529
2530 for sHint in sIemHints.split('|'):
2531 sHint = sHint.strip();
2532 if sHint.startswith('IEMOPHINT_'):
2533 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2534 if sShortHint in g_kdHints:
2535 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2536 else:
2537 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2538 elif sHint != '0':
2539 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2540
2541
2542 _ = sAsm;
2543 return True;
2544
2545 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2546 """
2547 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2548 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2549 """
2550 if not asOperands:
2551 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2552 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2553 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2554
2555 def checkCodeForMacro(self, sCode):
2556 """
2557 Checks code for relevant macro invocation.
2558 """
2559 #
2560 # Scan macro invocations.
2561 #
2562 if sCode.find('(') > 0:
2563 # Look for instruction decoder function definitions. ASSUME single line.
2564 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2565 [ 'FNIEMOP_DEF',
2566 'FNIEMOP_STUB',
2567 'FNIEMOP_STUB_1',
2568 'FNIEMOP_UD_STUB',
2569 'FNIEMOP_UD_STUB_1' ]);
2570 if asArgs is not None:
2571 sFunction = asArgs[1];
2572
2573 if not self.aoCurInstrs:
2574 self.addInstruction();
2575 for oInstr in self.aoCurInstrs:
2576 if oInstr.iLineFnIemOpMacro == -1:
2577 oInstr.iLineFnIemOpMacro = self.iLine;
2578 else:
2579 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2580 self.setInstrunctionAttrib('sFunction', sFunction);
2581 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2582 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2583 if asArgs[0].find('STUB') > 0:
2584 self.doneInstructions();
2585 return True;
2586
2587 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2588 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2589 if asArgs is not None:
2590 if len(self.aoCurInstrs) == 1:
2591 oInstr = self.aoCurInstrs[0];
2592 if oInstr.sStats is None:
2593 oInstr.sStats = asArgs[1];
2594 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2595
2596 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2597 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2598 if asArgs is not None:
2599 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2600 []);
2601 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2602 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2603 if asArgs is not None:
2604 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2605 [asArgs[6],]);
2606 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2607 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2608 if asArgs is not None:
2609 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2610 [asArgs[6], asArgs[7]]);
2611 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2612 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2613 if asArgs is not None:
2614 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2615 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2616 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2617 # a_fIemHints)
2618 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2619 if asArgs is not None:
2620 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2621 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2622
2623 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2624 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2625 if asArgs is not None:
2626 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2627 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2628 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2629 if asArgs is not None:
2630 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2631 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2632 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2633 if asArgs is not None:
2634 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2635 [asArgs[4], asArgs[5],]);
2636 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2637 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2638 if asArgs is not None:
2639 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2640 [asArgs[4], asArgs[5], asArgs[6],]);
2641 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2642 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2643 if asArgs is not None:
2644 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2645 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2646
2647 return False;
2648
2649
2650 def parse(self):
2651 """
2652 Parses the given file.
2653 Returns number or errors.
2654 Raises exception on fatal trouble.
2655 """
2656 self.debug('Parsing %s' % (self.sSrcFile,));
2657
2658 while self.iLine < len(self.asLines):
2659 sLine = self.asLines[self.iLine];
2660 self.iLine += 1;
2661
2662 # We only look for comments, so only lines with a slash might possibly
2663 # influence the parser state.
2664 if sLine.find('/') >= 0:
2665 #self.debug('line %d: slash' % (self.iLine,));
2666
2667 offLine = 0;
2668 while offLine < len(sLine):
2669 if self.iState == self.kiCode:
2670 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2671 if offHit >= 0:
2672 self.checkCodeForMacro(sLine[offLine:offHit]);
2673 self.sComment = '';
2674 self.iCommentLine = self.iLine;
2675 self.iState = self.kiCommentMulti;
2676 offLine = offHit + 2;
2677 else:
2678 self.checkCodeForMacro(sLine[offLine:]);
2679 offLine = len(sLine);
2680
2681 elif self.iState == self.kiCommentMulti:
2682 offHit = sLine.find('*/', offLine);
2683 if offHit >= 0:
2684 self.sComment += sLine[offLine:offHit];
2685 self.iState = self.kiCode;
2686 offLine = offHit + 2;
2687 self.parseComment();
2688 else:
2689 self.sComment += sLine[offLine:];
2690 offLine = len(sLine);
2691 else:
2692 assert False;
2693
2694 # No slash, but append the line if in multi-line comment.
2695 elif self.iState == self.kiCommentMulti:
2696 #self.debug('line %d: multi' % (self.iLine,));
2697 self.sComment += sLine;
2698
2699 # No slash, but check code line for relevant macro.
2700 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2701 #self.debug('line %d: macro' % (self.iLine,));
2702 self.checkCodeForMacro(sLine);
2703
2704 # If the line is a '}' in the first position, complete the instructions.
2705 elif self.iState == self.kiCode and sLine[0] == '}':
2706 #self.debug('line %d: }' % (self.iLine,));
2707 self.doneInstructions();
2708
2709 self.doneInstructions();
2710 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2711 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2712 return self.printErrors();
2713
2714
2715def __parseFileByName(sSrcFile, sDefaultMap):
2716 """
2717 Parses one source file for instruction specfications.
2718 """
2719 #
2720 # Read sSrcFile into a line array.
2721 #
2722 try:
2723 oFile = open(sSrcFile, "r");
2724 except Exception as oXcpt:
2725 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2726 try:
2727 asLines = oFile.readlines();
2728 except Exception as oXcpt:
2729 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2730 finally:
2731 oFile.close();
2732
2733 #
2734 # Do the parsing.
2735 #
2736 try:
2737 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2738 except ParserException as oXcpt:
2739 print(str(oXcpt));
2740 raise;
2741 except Exception as oXcpt:
2742 raise;
2743
2744 return cErrors;
2745
2746
2747def __doTestCopying():
2748 """
2749 Executes the asCopyTests instructions.
2750 """
2751 asErrors = [];
2752 for oDstInstr in g_aoAllInstructions:
2753 if oDstInstr.asCopyTests:
2754 for sSrcInstr in oDstInstr.asCopyTests:
2755 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
2756 if oSrcInstr:
2757 aoSrcInstrs = [oSrcInstr,];
2758 else:
2759 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
2760 if aoSrcInstrs:
2761 for oSrcInstr in aoSrcInstrs:
2762 if oSrcInstr != oDstInstr:
2763 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
2764 else:
2765 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
2766 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2767 else:
2768 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
2769 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2770
2771 if asErrors:
2772 sys.stderr.write(u''.join(asErrors));
2773 return len(asErrors);
2774
2775
2776def __applyOnlyTest():
2777 """
2778 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
2779 all other instructions so that only these get tested.
2780 """
2781 if g_aoOnlyTestInstructions:
2782 for oInstr in g_aoAllInstructions:
2783 if oInstr.aoTests:
2784 if oInstr not in g_aoOnlyTestInstructions:
2785 oInstr.aoTests = [];
2786 return 0;
2787
2788def __parseAll():
2789 """
2790 Parses all the IEMAllInstruction*.cpp.h files.
2791
2792 Raises exception on failure.
2793 """
2794 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2795 cErrors = 0;
2796 for sDefaultMap, sName in [
2797 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2798 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2799 ]:
2800 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2801 cErrors += __doTestCopying();
2802 cErrors += __applyOnlyTest();
2803
2804 if cErrors != 0:
2805 #raise Exception('%d parse errors' % (cErrors,));
2806 sys.exit(1);
2807 return True;
2808
2809
2810
2811__parseAll();
2812
2813
2814#
2815# Generators (may perhaps move later).
2816#
2817def generateDisassemblerTables(oDstFile = sys.stdout):
2818 """
2819 Generates disassembler tables.
2820 """
2821
2822 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2823 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2824 assert oMap.sName == sName;
2825 asLines = [];
2826
2827 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2828 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2829 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2830 asLines.append('{');
2831
2832 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2833
2834 aoTableOrder = oMap.getInstructionsInTableOrder();
2835 for iInstr, oInstr in enumerate(aoTableOrder):
2836
2837 if (iInstr & 0xf) == 0:
2838 if iInstr != 0:
2839 asLines.append('');
2840 asLines.append(' /* %x */' % (iInstr >> 4,));
2841
2842 if oInstr is None:
2843 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2844 elif isinstance(oInstr, list):
2845 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2846 else:
2847 sMacro = 'OP';
2848 cMaxOperands = 3;
2849 if len(oInstr.aoOperands) > 3:
2850 sMacro = 'OPVEX'
2851 cMaxOperands = 4;
2852 assert len(oInstr.aoOperands) <= cMaxOperands;
2853
2854 #
2855 # Format string.
2856 #
2857 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2858 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2859 sTmp += ' ' if iOperand == 0 else ',';
2860 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2861 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2862 else:
2863 sTmp += g_kdOpTypes[oOperand.sType][2];
2864 sTmp += '",';
2865 asColumns = [ sTmp, ];
2866
2867 #
2868 # Decoders.
2869 #
2870 iStart = len(asColumns);
2871 if oInstr.sEncoding is None:
2872 pass;
2873 elif oInstr.sEncoding == 'ModR/M':
2874 # ASSUME the first operand is using the ModR/M encoding
2875 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
2876 asColumns.append('IDX_ParseModRM,');
2877 ## @todo IDX_ParseVexDest
2878 # Is second operand using ModR/M too?
2879 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
2880 asColumns.append('IDX_UseModRM,')
2881 elif oInstr.sEncoding in [ 'prefix', ]:
2882 for oOperand in oInstr.aoOperands:
2883 asColumns.append('0,');
2884 elif oInstr.sEncoding in [ 'fixed' ]:
2885 pass;
2886 elif oInstr.sEncoding == 'vex2':
2887 asColumns.append('IDX_ParseVex2b,')
2888 elif oInstr.sEncoding == 'vex3':
2889 asColumns.append('IDX_ParseVex3b,')
2890 elif oInstr.sEncoding in g_dInstructionMaps:
2891 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
2892 else:
2893 ## @todo
2894 #IDX_ParseTwoByteEsc,
2895 #IDX_ParseGrp1,
2896 #IDX_ParseShiftGrp2,
2897 #IDX_ParseGrp3,
2898 #IDX_ParseGrp4,
2899 #IDX_ParseGrp5,
2900 #IDX_Parse3DNow,
2901 #IDX_ParseGrp6,
2902 #IDX_ParseGrp7,
2903 #IDX_ParseGrp8,
2904 #IDX_ParseGrp9,
2905 #IDX_ParseGrp10,
2906 #IDX_ParseGrp12,
2907 #IDX_ParseGrp13,
2908 #IDX_ParseGrp14,
2909 #IDX_ParseGrp15,
2910 #IDX_ParseGrp16,
2911 #IDX_ParseThreeByteEsc4,
2912 #IDX_ParseThreeByteEsc5,
2913 #IDX_ParseModFence,
2914 #IDX_ParseEscFP,
2915 #IDX_ParseNopPause,
2916 #IDX_ParseInvOpModRM,
2917 assert False, str(oInstr);
2918
2919 # Check for immediates and stuff in the remaining operands.
2920 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
2921 sIdx = g_kdOpTypes[oOperand.sType][0];
2922 if sIdx != 'IDX_UseModRM':
2923 asColumns.append(sIdx + ',');
2924 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
2925
2926 #
2927 # Opcode and operands.
2928 #
2929 assert oInstr.sDisEnum, str(oInstr);
2930 asColumns.append(oInstr.sDisEnum + ',');
2931 iStart = len(asColumns)
2932 for oOperand in oInstr.aoOperands:
2933 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
2934 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
2935
2936 #
2937 # Flags.
2938 #
2939 sTmp = '';
2940 for sHint in sorted(oInstr.dHints.keys()):
2941 sDefine = g_kdHints[sHint];
2942 if sDefine.startswith('DISOPTYPE_'):
2943 if sTmp:
2944 sTmp += ' | ' + sDefine;
2945 else:
2946 sTmp += sDefine;
2947 if sTmp:
2948 sTmp += '),';
2949 else:
2950 sTmp += '0),';
2951 asColumns.append(sTmp);
2952
2953 #
2954 # Format the columns into a line.
2955 #
2956 sLine = '';
2957 for i, s in enumerate(asColumns):
2958 if len(sLine) < aoffColumns[i]:
2959 sLine += ' ' * (aoffColumns[i] - len(sLine));
2960 else:
2961 sLine += ' ';
2962 sLine += s;
2963
2964 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
2965 # DISOPTYPE_HARMLESS),
2966 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
2967 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
2968
2969 asLines.append(sLine);
2970
2971 asLines.append('};');
2972 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
2973
2974 #
2975 # Write out the lines.
2976 #
2977 oDstFile.write('\n'.join(asLines));
2978 oDstFile.write('\n');
2979 break; #for now
2980
2981if __name__ == '__main__':
2982 generateDisassemblerTables();
2983
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette