VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66306

Last change on this file since 66306 was 66306, checked in by vboxsync, 8 years ago

IEM: Implemented movss Wss,Vss (f3 0f 11).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 125.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66306 2017-03-28 14:49:17Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66306 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
164 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
165 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
166 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
167
168 # ModR/M.rm - memory only.
169 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
170
171 # ModR/M.reg
172 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
173 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
174 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
175 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
176 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
177 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
178 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
179
180 # Immediate values.
181 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
182 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
183 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
184 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
185 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
186 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
187
188 # Address operands (no ModR/M).
189 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
190 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
191
192 # Relative jump targets
193 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
194 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
195
196 # DS:rSI
197 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
198 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
199 # ES:rDI
200 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
201 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
202
203 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
204
205 # Fixed registers.
206 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
207 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
208 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
209 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
210 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
211 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
212 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
213 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
214};
215
216# IDX_ParseFixedReg
217# IDX_ParseVexDest
218
219
220## IEMFORM_XXX mappings.
221g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
222 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
223 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
224 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
225 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
226 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
227 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
228 'M': ( 'ModR/M', [ 'rm', ], ),
229 'M_REG': ( 'ModR/M', [ 'rm', ], ),
230 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
231 'R': ( 'ModR/M', [ 'reg', ], ),
232 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
233 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
234 'FIXED': ( 'fixed', None, )
235};
236
237## \@oppfx values.
238g_kdPrefixes = {
239 'none': [],
240 '0x66': [],
241 '0xf3': [],
242 '0xf2': [],
243};
244
245## Special \@opcode tag values.
246g_kdSpecialOpcodes = {
247 '/reg': [],
248 'mr/reg': [],
249 '11 /reg': [],
250 '!11 /reg': [],
251 '11 mr/reg': [],
252 '!11 mr/reg': [],
253};
254
255## Valid values for \@openc
256g_kdEncodings = {
257 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
258 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
259 'prefix': [ None, ], ##< Prefix
260};
261
262## \@opunused, \@opinvalid, \@opinvlstyle
263g_kdInvalidStyles = {
264 'immediate': [], ##< CPU stops decoding immediately after the opcode.
265 'intel-modrm': [], ##< Intel decodes ModR/M.
266 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
267 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
268 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
269};
270
271g_kdCpuNames = {
272 '8086': (),
273 '80186': (),
274 '80286': (),
275 '80386': (),
276 '80486': (),
277};
278
279## \@opcpuid
280g_kdCpuIdFlags = {
281 'vme': 'X86_CPUID_FEATURE_EDX_VME',
282 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
283 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
284 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
285 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
286 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
287 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
288 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
289 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
290 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
291 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
292 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
293 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
294 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
295 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
296 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
297 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
298 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
299 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
300 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
301 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
302 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
303 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
304 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
305 'aes': 'X86_CPUID_FEATURE_ECX_AES',
306 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
307 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
308 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
309 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
310
311 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
312 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
313 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
314 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
315 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
316 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
317 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
318 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
319 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
320 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
321};
322
323## \@ophints values.
324g_kdHints = {
325 'invalid': 'DISOPTYPE_INVALID', ##<
326 'harmless': 'DISOPTYPE_HARMLESS', ##<
327 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
328 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
329 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
330 'portio': 'DISOPTYPE_PORTIO', ##<
331 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
332 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
333 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
334 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
335 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
336 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
337 'illegal': 'DISOPTYPE_ILLEGAL', ##<
338 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
339 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
340 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
341 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
342 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
343 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
344 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
345 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
346 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
347 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
348 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
349 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
350 ## (only in 16 & 32 bits mode!)
351 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
352 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
353 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
354 'ignores_op_size': '', ##< Ignores both operand size prefixes.
355 'lock_allowed': '', ##< Lock prefix allowed.
356};
357
358## \@opxcpttype values (see SDMv2 2.4, 2.7).
359g_kdXcptTypes = {
360 'none': [],
361 '1': [],
362 '2': [],
363 '3': [],
364 '4': [],
365 '4UA': [],
366 '5': [],
367 '6': [],
368 '7': [],
369 '8': [],
370 '11': [],
371 '12': [],
372 'E1': [],
373 'E1NF': [],
374 'E2': [],
375 'E3': [],
376 'E3NF': [],
377 'E4': [],
378 'E4NF': [],
379 'E5': [],
380 'E5NF': [],
381 'E6': [],
382 'E6NF': [],
383 'E7NF': [],
384 'E9': [],
385 'E9NF': [],
386 'E10': [],
387 'E11': [],
388 'E12': [],
389 'E12NF': [],
390};
391
392
393def _isValidOpcodeByte(sOpcode):
394 """
395 Checks if sOpcode is a valid lower case opcode byte.
396 Returns true/false.
397 """
398 if len(sOpcode) == 4:
399 if sOpcode[:2] == '0x':
400 if sOpcode[2] in '0123456789abcdef':
401 if sOpcode[3] in '0123456789abcdef':
402 return True;
403 return False;
404
405
406class InstructionMap(object):
407 """
408 Instruction map.
409
410 The opcode map provides the lead opcode bytes (empty for the one byte
411 opcode map). An instruction can be member of multiple opcode maps as long
412 as it uses the same opcode value within the map (because of VEX).
413 """
414
415 kdEncodings = {
416 'legacy': [],
417 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
418 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
419 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
420 'xop8': [], ##< XOP prefix with vvvvv = 8
421 'xop9': [], ##< XOP prefix with vvvvv = 9
422 'xop10': [], ##< XOP prefix with vvvvv = 10
423 };
424 ## Selectors.
425 ## The first value is the number of table entries required by a
426 ## decoder or disassembler for this type of selector.
427 kdSelectors = {
428 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
429 '/r': [ 8, ], ##< modrm.reg selects the instruction.
430 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
431 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
432 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
433 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
434 };
435
436 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
437 assert sSelector in self.kdSelectors;
438 assert sEncoding in self.kdEncodings;
439 if asLeadOpcodes is None:
440 asLeadOpcodes = [];
441 else:
442 for sOpcode in asLeadOpcodes:
443 assert _isValidOpcodeByte(sOpcode);
444 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
445
446 self.sName = sName;
447 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
448 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
449 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
450 self.aoInstructions = []; # type: Instruction
451 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
452
453 def getTableSize(self):
454 """
455 Number of table entries. This corresponds directly to the selector.
456 """
457 return self.kdSelectors[self.sSelector][0];
458
459 def getInstructionIndex(self, oInstr):
460 """
461 Returns the table index for the instruction.
462 """
463 bOpcode = oInstr.getOpcodeByte();
464
465 # The byte selector is simple. We need a full opcode byte and need just return it.
466 if self.sSelector == 'byte':
467 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
468 return bOpcode;
469
470 # The other selectors needs masking and shifting.
471 if self.sSelector == '/r':
472 return (bOpcode >> 3) & 0x7;
473
474 if self.sSelector == 'mod /r':
475 return (bOpcode >> 3) & 0x1f;
476
477 if self.sSelector == '!11 /r':
478 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
479 return (bOpcode >> 3) & 0x7;
480
481 if self.sSelector == '11 /r':
482 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
483 return (bOpcode >> 3) & 0x7;
484
485 if self.sSelector == '11':
486 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
487 return bOpcode & 0x3f;
488
489 assert False, self.sSelector;
490 return -1;
491
492 def getInstructionsInTableOrder(self):
493 """
494 Get instructions in table order.
495
496 Returns array of instructions. Normally there is exactly one
497 instruction per entry. However the entry could also be None if
498 not instruction was specified for that opcode value. Or there
499 could be a list of instructions to deal with special encodings
500 where for instance prefix (e.g. REX.W) encodes a different
501 instruction or different CPUs have different instructions or
502 prefixes in the same place.
503 """
504 # Start with empty table.
505 cTable = self.getTableSize();
506 aoTable = [None] * cTable;
507
508 # Insert the instructions.
509 for oInstr in self.aoInstructions:
510 if oInstr.sOpcode:
511 idxOpcode = self.getInstructionIndex(oInstr);
512 assert idxOpcode < cTable, str(idxOpcode);
513
514 oExisting = aoTable[idxOpcode];
515 if oExisting is None:
516 aoTable[idxOpcode] = oInstr;
517 elif not isinstance(oExisting, list):
518 aoTable[idxOpcode] = list([oExisting, oInstr]);
519 else:
520 oExisting.append(oInstr);
521
522 return aoTable;
523
524
525 def getDisasTableName(self):
526 """
527 Returns the disassembler table name for this map.
528 """
529 sName = 'g_aDisas';
530 for sWord in self.sName.split('_'):
531 if sWord == 'm': # suffix indicating modrm.mod==mem
532 sName += '_m';
533 elif sWord == 'r': # suffix indicating modrm.mod==reg
534 sName += '_r';
535 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
536 sName += '_' + sWord;
537 else:
538 sWord = sWord.replace('grp', 'Grp');
539 sWord = sWord.replace('map', 'Map');
540 sName += sWord[0].upper() + sWord[1:];
541 return sName;
542
543
544class TestType(object):
545 """
546 Test value type.
547
548 This base class deals with integer like values. The fUnsigned constructor
549 parameter indicates the default stance on zero vs sign extending. It is
550 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
551 """
552 def __init__(self, sName, acbSizes = None, fUnsigned = True):
553 self.sName = sName;
554 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
555 self.fUnsigned = fUnsigned;
556
557 class BadValue(Exception):
558 """ Bad value exception. """
559 def __init__(self, sMessage):
560 Exception.__init__(self, sMessage);
561 self.sMessage = sMessage;
562
563 ## For ascii ~ operator.
564 kdHexInv = {
565 '0': 'f',
566 '1': 'e',
567 '2': 'd',
568 '3': 'c',
569 '4': 'b',
570 '5': 'a',
571 '6': '9',
572 '7': '8',
573 '8': '7',
574 '9': '6',
575 'a': '5',
576 'b': '4',
577 'c': '3',
578 'd': '2',
579 'e': '1',
580 'f': '0',
581 };
582
583 def get(self, sValue):
584 """
585 Get the shortest normal sized byte representation of oValue.
586
587 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
588 The latter form is for AND+OR pairs where the first entry is what to
589 AND with the field and the second the one or OR with.
590
591 Raises BadValue if invalid value.
592 """
593 if not sValue:
594 raise TestType.BadValue('empty value');
595
596 # Deal with sign and detect hexadecimal or decimal.
597 fSignExtend = not self.fUnsigned;
598 if sValue[0] == '-' or sValue[0] == '+':
599 fSignExtend = True;
600 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
601 else:
602 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
603
604 # try convert it to long integer.
605 try:
606 iValue = long(sValue, 16 if fHex else 10);
607 except Exception as oXcpt:
608 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
609
610 # Convert the hex string and pad it to a decent value. Negative values
611 # needs to be manually converted to something non-negative (~-n + 1).
612 if iValue >= 0:
613 sHex = hex(iValue);
614 if sys.version_info[0] < 3:
615 assert sHex[-1] == 'L';
616 sHex = sHex[:-1];
617 assert sHex[:2] == '0x';
618 sHex = sHex[2:];
619 else:
620 sHex = hex(-iValue - 1);
621 if sys.version_info[0] < 3:
622 assert sHex[-1] == 'L';
623 sHex = sHex[:-1];
624 assert sHex[:2] == '0x';
625 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
626 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
627 sHex = 'f' + sHex;
628
629 cDigits = len(sHex);
630 if cDigits <= self.acbSizes[-1] * 2:
631 for cb in self.acbSizes:
632 cNaturalDigits = cb * 2;
633 if cDigits <= cNaturalDigits:
634 break;
635 else:
636 cNaturalDigits = self.acbSizes[-1] * 2;
637 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
638 assert isinstance(cNaturalDigits, int)
639
640 if cNaturalDigits != cDigits:
641 cNeeded = cNaturalDigits - cDigits;
642 if iValue >= 0:
643 sHex = ('0' * cNeeded) + sHex;
644 else:
645 sHex = ('f' * cNeeded) + sHex;
646
647 # Invert and convert to bytearray and return it.
648 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
649
650 return ((fSignExtend, abValue),);
651
652 def validate(self, sValue):
653 """
654 Returns True if value is okay, error message on failure.
655 """
656 try:
657 self.get(sValue);
658 except TestType.BadValue as oXcpt:
659 return oXcpt.sMessage;
660 return True;
661
662 def isAndOrPair(self, sValue):
663 """
664 Checks if sValue is a pair.
665 """
666 _ = sValue;
667 return False;
668
669
670class TestTypeEflags(TestType):
671 """
672 Special value parsing for EFLAGS/RFLAGS/FLAGS.
673 """
674
675 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
676
677 def __init__(self, sName):
678 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
679
680 def get(self, sValue):
681 fClear = 0;
682 fSet = 0;
683 for sFlag in sValue.split(','):
684 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
685 if sConstant is None:
686 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
687 if sConstant[0] == '!':
688 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
689 else:
690 fSet |= g_kdX86EFlagsConstants[sConstant];
691
692 aoSet = TestType.get(self, '0x%x' % (fSet,));
693 if fClear != 0:
694 aoClear = TestType.get(self, '%#x' % (fClear,))
695 assert self.isAndOrPair(sValue) is True;
696 return (aoClear[0], aoSet[0]);
697 assert self.isAndOrPair(sValue) is False;
698 return aoSet;
699
700 def isAndOrPair(self, sValue):
701 for sZeroFlag in self.kdZeroValueFlags:
702 if sValue.find(sZeroFlag) >= 0:
703 return True;
704 return False;
705
706
707
708class TestInOut(object):
709 """
710 One input or output state modifier.
711
712 This should be thought as values to modify BS3REGCTX and extended (needs
713 to be structured) state.
714 """
715 ## Assigned operators.
716 kasOperators = [
717 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
718 '&~=',
719 '&=',
720 '|=',
721 '='
722 ];
723 ## Types
724 kdTypes = {
725 'uint': TestType('uint', fUnsigned = True),
726 'int': TestType('int'),
727 'efl': TestTypeEflags('efl'),
728 };
729 ## CPU context fields.
730 kdFields = {
731 # name: ( default type, [both|input|output], )
732 # Operands.
733 'op1': ( 'uint', 'both', ), ## \@op1
734 'op2': ( 'uint', 'both', ), ## \@op2
735 'op3': ( 'uint', 'both', ), ## \@op3
736 'op4': ( 'uint', 'both', ), ## \@op4
737 # Flags.
738 'efl': ( 'efl', 'both', ),
739 'efl_undef': ( 'uint', 'output', ),
740 # 8-bit GPRs.
741 'al': ( 'uint', 'both', ),
742 'cl': ( 'uint', 'both', ),
743 'dl': ( 'uint', 'both', ),
744 'bl': ( 'uint', 'both', ),
745 'ah': ( 'uint', 'both', ),
746 'ch': ( 'uint', 'both', ),
747 'dh': ( 'uint', 'both', ),
748 'bh': ( 'uint', 'both', ),
749 'r8l': ( 'uint', 'both', ),
750 'r9l': ( 'uint', 'both', ),
751 'r10l': ( 'uint', 'both', ),
752 'r11l': ( 'uint', 'both', ),
753 'r12l': ( 'uint', 'both', ),
754 'r13l': ( 'uint', 'both', ),
755 'r14l': ( 'uint', 'both', ),
756 'r15l': ( 'uint', 'both', ),
757 # 16-bit GPRs.
758 'ax': ( 'uint', 'both', ),
759 'dx': ( 'uint', 'both', ),
760 'cx': ( 'uint', 'both', ),
761 'bx': ( 'uint', 'both', ),
762 'sp': ( 'uint', 'both', ),
763 'bp': ( 'uint', 'both', ),
764 'si': ( 'uint', 'both', ),
765 'di': ( 'uint', 'both', ),
766 'r8w': ( 'uint', 'both', ),
767 'r9w': ( 'uint', 'both', ),
768 'r10w': ( 'uint', 'both', ),
769 'r11w': ( 'uint', 'both', ),
770 'r12w': ( 'uint', 'both', ),
771 'r13w': ( 'uint', 'both', ),
772 'r14w': ( 'uint', 'both', ),
773 'r15w': ( 'uint', 'both', ),
774 # 32-bit GPRs.
775 'eax': ( 'uint', 'both', ),
776 'edx': ( 'uint', 'both', ),
777 'ecx': ( 'uint', 'both', ),
778 'ebx': ( 'uint', 'both', ),
779 'esp': ( 'uint', 'both', ),
780 'ebp': ( 'uint', 'both', ),
781 'esi': ( 'uint', 'both', ),
782 'edi': ( 'uint', 'both', ),
783 'r8d': ( 'uint', 'both', ),
784 'r9d': ( 'uint', 'both', ),
785 'r10d': ( 'uint', 'both', ),
786 'r11d': ( 'uint', 'both', ),
787 'r12d': ( 'uint', 'both', ),
788 'r13d': ( 'uint', 'both', ),
789 'r14d': ( 'uint', 'both', ),
790 'r15d': ( 'uint', 'both', ),
791 # 64-bit GPRs.
792 'rax': ( 'uint', 'both', ),
793 'rdx': ( 'uint', 'both', ),
794 'rcx': ( 'uint', 'both', ),
795 'rbx': ( 'uint', 'both', ),
796 'rsp': ( 'uint', 'both', ),
797 'rbp': ( 'uint', 'both', ),
798 'rsi': ( 'uint', 'both', ),
799 'rdi': ( 'uint', 'both', ),
800 'r8': ( 'uint', 'both', ),
801 'r9': ( 'uint', 'both', ),
802 'r10': ( 'uint', 'both', ),
803 'r11': ( 'uint', 'both', ),
804 'r12': ( 'uint', 'both', ),
805 'r13': ( 'uint', 'both', ),
806 'r14': ( 'uint', 'both', ),
807 'r15': ( 'uint', 'both', ),
808 # 16-bit, 32-bit or 64-bit registers according to operand size.
809 'oz.rax': ( 'uint', 'both', ),
810 'oz.rdx': ( 'uint', 'both', ),
811 'oz.rcx': ( 'uint', 'both', ),
812 'oz.rbx': ( 'uint', 'both', ),
813 'oz.rsp': ( 'uint', 'both', ),
814 'oz.rbp': ( 'uint', 'both', ),
815 'oz.rsi': ( 'uint', 'both', ),
816 'oz.rdi': ( 'uint', 'both', ),
817 'oz.r8': ( 'uint', 'both', ),
818 'oz.r9': ( 'uint', 'both', ),
819 'oz.r10': ( 'uint', 'both', ),
820 'oz.r11': ( 'uint', 'both', ),
821 'oz.r12': ( 'uint', 'both', ),
822 'oz.r13': ( 'uint', 'both', ),
823 'oz.r14': ( 'uint', 'both', ),
824 'oz.r15': ( 'uint', 'both', ),
825 # Special ones.
826 'value.xcpt': ( 'uint', 'output', ),
827 };
828
829 def __init__(self, sField, sOp, sValue, sType):
830 assert sField in self.kdFields;
831 assert sOp in self.kasOperators;
832 self.sField = sField;
833 self.sOp = sOp;
834 self.sValue = sValue;
835 self.sType = sType;
836 assert isinstance(sField, str);
837 assert isinstance(sOp, str);
838 assert isinstance(sType, str);
839 assert isinstance(sValue, str);
840
841
842class TestSelector(object):
843 """
844 One selector for an instruction test.
845 """
846 ## Selector compare operators.
847 kasCompareOps = [ '==', '!=' ];
848 ## Selector variables and their valid values.
849 kdVariables = {
850 # Operand size.
851 'size': {
852 'o16': 'size_o16',
853 'o32': 'size_o32',
854 'o64': 'size_o64',
855 },
856 # Execution ring.
857 'ring': {
858 '0': 'ring_0',
859 '1': 'ring_1',
860 '2': 'ring_2',
861 '3': 'ring_3',
862 '0..2': 'ring_0_thru_2',
863 '1..3': 'ring_1_thru_3',
864 },
865 # Basic code mode.
866 'codebits': {
867 '64': 'code_64bit',
868 '32': 'code_32bit',
869 '16': 'code_16bit',
870 },
871 # cpu modes.
872 'mode': {
873 'real': 'mode_real',
874 'prot': 'mode_prot',
875 'long': 'mode_long',
876 'v86': 'mode_v86',
877 'smm': 'mode_smm',
878 'vmx': 'mode_vmx',
879 'svm': 'mode_svm',
880 },
881 # paging on/off
882 'paging': {
883 'on': 'paging_on',
884 'off': 'paging_off',
885 },
886 };
887 ## Selector shorthand predicates.
888 ## These translates into variable expressions.
889 kdPredicates = {
890 'o16': 'size==o16',
891 'o32': 'size==o32',
892 'o64': 'size==o64',
893 'ring0': 'ring==0',
894 '!ring0': 'ring==1..3',
895 'ring1': 'ring==1',
896 'ring2': 'ring==2',
897 'ring3': 'ring==3',
898 'user': 'ring==3',
899 'supervisor': 'ring==0..2',
900 'real': 'mode==real',
901 'prot': 'mode==prot',
902 'long': 'mode==long',
903 'v86': 'mode==v86',
904 'smm': 'mode==smm',
905 'vmx': 'mode==vmx',
906 'svm': 'mode==svm',
907 'paging': 'paging==on',
908 '!paging': 'paging==off',
909 };
910
911 def __init__(self, sVariable, sOp, sValue):
912 assert sVariable in self.kdVariables;
913 assert sOp in self.kasCompareOps;
914 assert sValue in self.kdVariables[sVariable];
915 self.sVariable = sVariable;
916 self.sOp = sOp;
917 self.sValue = sValue;
918
919
920class InstructionTest(object):
921 """
922 Instruction test.
923 """
924
925 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
926 self.oInstr = oInstr; # type: InstructionTest
927 self.aoInputs = []; # type: list(TestInOut)
928 self.aoOutputs = []; # type: list(TestInOut)
929 self.aoSelectors = []; # type: list(TestSelector)
930
931 def toString(self, fRepr = False):
932 """
933 Converts it to string representation.
934 """
935 asWords = [];
936 if self.aoSelectors:
937 for oSelector in self.aoSelectors:
938 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
939 asWords.append('/');
940
941 for oModifier in self.aoInputs:
942 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
943
944 asWords.append('->');
945
946 for oModifier in self.aoOutputs:
947 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
948
949 if fRepr:
950 return '<' + ' '.join(asWords) + '>';
951 return ' '.join(asWords);
952
953 def __str__(self):
954 """ Provide string represenation. """
955 return self.toString(False);
956
957 def __repr__(self):
958 """ Provide unambigious string representation. """
959 return self.toString(True);
960
961class Operand(object):
962 """
963 Instruction operand.
964 """
965
966 def __init__(self, sWhere, sType):
967 assert sWhere in g_kdOpLocations, sWhere;
968 assert sType in g_kdOpTypes, sType;
969 self.sWhere = sWhere; ##< g_kdOpLocations
970 self.sType = sType; ##< g_kdOpTypes
971
972 def usesModRM(self):
973 """ Returns True if using some form of ModR/M encoding. """
974 return self.sType[0] in ['E', 'G', 'M'];
975
976
977
978class Instruction(object): # pylint: disable=too-many-instance-attributes
979 """
980 Instruction.
981 """
982
983 def __init__(self, sSrcFile, iLine):
984 ## @name Core attributes.
985 ## @{
986 self.sMnemonic = None;
987 self.sBrief = None;
988 self.asDescSections = []; # type: list(str)
989 self.aoMaps = []; # type: list(InstructionMap)
990 self.aoOperands = []; # type: list(Operand)
991 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
992 self.sOpcode = None; # type: str
993 self.sEncoding = None;
994 self.asFlTest = None;
995 self.asFlModify = None;
996 self.asFlUndefined = None;
997 self.asFlSet = None;
998 self.asFlClear = None;
999 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1000 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1001 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1002 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1003 self.aoTests = []; # type: list(InstructionTest)
1004 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1005 self.oCpuExpr = None; ##< Some CPU restriction expression...
1006 self.sGroup = None;
1007 self.fUnused = False; ##< Unused instruction.
1008 self.fInvalid = False; ##< Invalid instruction (like UD2).
1009 self.sInvalidStyle = None; ##< Invalid behviour style
1010 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1011 ## @}
1012
1013 ## @name Implementation attributes.
1014 ## @{
1015 self.sStats = None;
1016 self.sFunction = None;
1017 self.fStub = False;
1018 self.fUdStub = False;
1019 ## @}
1020
1021 ## @name Decoding info
1022 ## @{
1023 self.sSrcFile = sSrcFile;
1024 self.iLineCreated = iLine;
1025 self.iLineCompleted = None;
1026 self.cOpTags = 0;
1027 self.iLineFnIemOpMacro = -1;
1028 self.iLineMnemonicMacro = -1;
1029 ## @}
1030
1031 ## @name Intermediate input fields.
1032 ## @{
1033 self.sRawDisOpNo = None;
1034 self.asRawDisParams = [];
1035 self.sRawIemOpFlags = None;
1036 self.sRawOldOpcodes = None;
1037 self.asCopyTests = [];
1038 ## @}
1039
1040 def toString(self, fRepr = False):
1041 """ Turn object into a string. """
1042 aasFields = [];
1043
1044 aasFields.append(['opcode', self.sOpcode]);
1045 aasFields.append(['mnemonic', self.sMnemonic]);
1046 for iOperand, oOperand in enumerate(self.aoOperands):
1047 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1048 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1049 aasFields.append(['encoding', self.sEncoding]);
1050 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1051 aasFields.append(['disenum', self.sDisEnum]);
1052 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1053 aasFields.append(['group', self.sGroup]);
1054 if self.fUnused: aasFields.append(['unused', 'True']);
1055 if self.fInvalid: aasFields.append(['invalid', 'True']);
1056 aasFields.append(['invlstyle', self.sInvalidStyle]);
1057 aasFields.append(['fltest', self.asFlTest]);
1058 aasFields.append(['flmodify', self.asFlModify]);
1059 aasFields.append(['flundef', self.asFlUndefined]);
1060 aasFields.append(['flset', self.asFlSet]);
1061 aasFields.append(['flclear', self.asFlClear]);
1062 aasFields.append(['mincpu', self.sMinCpu]);
1063 aasFields.append(['stats', self.sStats]);
1064 aasFields.append(['sFunction', self.sFunction]);
1065 if self.fStub: aasFields.append(['fStub', 'True']);
1066 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1067 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1068 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1069 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1070
1071 sRet = '<' if fRepr else '';
1072 for sField, sValue in aasFields:
1073 if sValue != None:
1074 if len(sRet) > 1:
1075 sRet += '; ';
1076 sRet += '%s=%s' % (sField, sValue,);
1077 if fRepr:
1078 sRet += '>';
1079
1080 return sRet;
1081
1082 def __str__(self):
1083 """ Provide string represenation. """
1084 return self.toString(False);
1085
1086 def __repr__(self):
1087 """ Provide unambigious string representation. """
1088 return self.toString(True);
1089
1090 def getOpcodeByte(self):
1091 """
1092 Decodes sOpcode into a byte range integer value.
1093 Raises exception if sOpcode is None or invalid.
1094 """
1095 if self.sOpcode is None:
1096 raise Exception('No opcode byte for %s!' % (self,));
1097 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1098
1099 # Full hex byte form.
1100 if sOpcode[:2] == '0x':
1101 return int(sOpcode, 16);
1102
1103 # The /r form:
1104 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
1105 return int(sOpcode[1:]) << 3;
1106
1107 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1108
1109 @staticmethod
1110 def _flagsToIntegerMask(asFlags):
1111 """
1112 Returns the integer mask value for asFlags.
1113 """
1114 uRet = 0;
1115 if asFlags:
1116 for sFlag in asFlags:
1117 sConstant = g_kdEFlagsMnemonics[sFlag];
1118 assert sConstant[0] != '!', sConstant
1119 uRet |= g_kdX86EFlagsConstants[sConstant];
1120 return uRet;
1121
1122 def getTestedFlagsMask(self):
1123 """ Returns asFlTest into a integer mask value """
1124 return self._flagsToIntegerMask(self.asFlTest);
1125
1126 def getModifiedFlagsMask(self):
1127 """ Returns asFlModify into a integer mask value """
1128 return self._flagsToIntegerMask(self.asFlModify);
1129
1130 def getUndefinedFlagsMask(self):
1131 """ Returns asFlUndefined into a integer mask value """
1132 return self._flagsToIntegerMask(self.asFlUndefined);
1133
1134 def getSetFlagsMask(self):
1135 """ Returns asFlSet into a integer mask value """
1136 return self._flagsToIntegerMask(self.asFlSet);
1137
1138 def getClearedFlagsMask(self):
1139 """ Returns asFlClear into a integer mask value """
1140 return self._flagsToIntegerMask(self.asFlClear);
1141
1142
1143## All the instructions.
1144g_aoAllInstructions = []; # type: list(Instruction)
1145
1146## All the instructions indexed by statistics name (opstat).
1147g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1148
1149## All the instructions indexed by function name (opfunction).
1150g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1151
1152## Instructions tagged by oponlytest
1153g_aoOnlyTestInstructions = []; # type: list(Instruction)
1154
1155## Instruction maps.
1156g_dInstructionMaps = {
1157 'one': InstructionMap('one'),
1158 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1159 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1160 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1161 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1162 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1163 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1164 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1165 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1166 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1167 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1168 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1169 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1170 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1171 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1172 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1173 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1174 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1175 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1176 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1177
1178 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1179 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1180 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1181 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1182 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1183 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1184 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1185 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1186 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1187 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1188 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1189 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1190 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1191 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1192
1193 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1194 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1195
1196 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1197 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1198 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1199 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1200 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1201 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1202
1203 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1204 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1205
1206 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1207 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1208 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1209 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1210 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1211 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1212 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1213};
1214
1215
1216
1217class ParserException(Exception):
1218 """ Parser exception """
1219 def __init__(self, sMessage):
1220 Exception.__init__(self, sMessage);
1221
1222
1223class SimpleParser(object):
1224 """
1225 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1226 """
1227
1228 ## @name Parser state.
1229 ## @{
1230 kiCode = 0;
1231 kiCommentMulti = 1;
1232 ## @}
1233
1234 def __init__(self, sSrcFile, asLines, sDefaultMap):
1235 self.sSrcFile = sSrcFile;
1236 self.asLines = asLines;
1237 self.iLine = 0;
1238 self.iState = self.kiCode;
1239 self.sComment = '';
1240 self.iCommentLine = 0;
1241 self.aoCurInstrs = [];
1242
1243 assert sDefaultMap in g_dInstructionMaps;
1244 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1245
1246 self.cTotalInstr = 0;
1247 self.cTotalStubs = 0;
1248 self.cTotalTagged = 0;
1249
1250 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1251 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1252 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1253 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1254 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1255 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1256 self.fDebug = True;
1257
1258 self.dTagHandlers = {
1259 '@opbrief': self.parseTagOpBrief,
1260 '@opdesc': self.parseTagOpDesc,
1261 '@opmnemonic': self.parseTagOpMnemonic,
1262 '@op1': self.parseTagOpOperandN,
1263 '@op2': self.parseTagOpOperandN,
1264 '@op3': self.parseTagOpOperandN,
1265 '@op4': self.parseTagOpOperandN,
1266 '@oppfx': self.parseTagOpPfx,
1267 '@opmaps': self.parseTagOpMaps,
1268 '@opcode': self.parseTagOpcode,
1269 '@openc': self.parseTagOpEnc,
1270 '@opfltest': self.parseTagOpEFlags,
1271 '@opflmodify': self.parseTagOpEFlags,
1272 '@opflundef': self.parseTagOpEFlags,
1273 '@opflset': self.parseTagOpEFlags,
1274 '@opflclear': self.parseTagOpEFlags,
1275 '@ophints': self.parseTagOpHints,
1276 '@opdisenum': self.parseTagOpDisEnum,
1277 '@opmincpu': self.parseTagOpMinCpu,
1278 '@opcpuid': self.parseTagOpCpuId,
1279 '@opgroup': self.parseTagOpGroup,
1280 '@opunused': self.parseTagOpUnusedInvalid,
1281 '@opinvalid': self.parseTagOpUnusedInvalid,
1282 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1283 '@optest': self.parseTagOpTest,
1284 '@optestign': self.parseTagOpTestIgnore,
1285 '@optestignore': self.parseTagOpTestIgnore,
1286 '@opcopytests': self.parseTagOpCopyTests,
1287 '@oponlytest': self.parseTagOpOnlyTest,
1288 '@opxcpttype': self.parseTagOpXcptType,
1289 '@opstats': self.parseTagOpStats,
1290 '@opfunction': self.parseTagOpFunction,
1291 '@opdone': self.parseTagOpDone,
1292 };
1293
1294 self.asErrors = [];
1295
1296 def raiseError(self, sMessage):
1297 """
1298 Raise error prefixed with the source and line number.
1299 """
1300 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1301
1302 def raiseCommentError(self, iLineInComment, sMessage):
1303 """
1304 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1305 """
1306 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1307
1308 def error(self, sMessage):
1309 """
1310 Adds an error.
1311 returns False;
1312 """
1313 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1314 return False;
1315
1316 def errorComment(self, iLineInComment, sMessage):
1317 """
1318 Adds a comment error.
1319 returns False;
1320 """
1321 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1322 return False;
1323
1324 def printErrors(self):
1325 """
1326 Print the errors to stderr.
1327 Returns number of errors.
1328 """
1329 if self.asErrors:
1330 sys.stderr.write(u''.join(self.asErrors));
1331 return len(self.asErrors);
1332
1333 def debug(self, sMessage):
1334 """
1335 For debugging.
1336 """
1337 if self.fDebug:
1338 print('debug: %s' % (sMessage,));
1339
1340
1341 def addInstruction(self, iLine = None):
1342 """
1343 Adds an instruction.
1344 """
1345 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1346 g_aoAllInstructions.append(oInstr);
1347 self.aoCurInstrs.append(oInstr);
1348 return oInstr;
1349
1350 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1351 """
1352 Derives the mnemonic and operands from a IEM stats base name like string.
1353 """
1354 if oInstr.sMnemonic is None:
1355 asWords = sStats.split('_');
1356 oInstr.sMnemonic = asWords[0].lower();
1357 if len(asWords) > 1 and not oInstr.aoOperands:
1358 for sType in asWords[1:]:
1359 if sType in g_kdOpTypes:
1360 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1361 else:
1362 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1363 return False;
1364 return True;
1365
1366 def doneInstructionOne(self, oInstr, iLine):
1367 """
1368 Complete the parsing by processing, validating and expanding raw inputs.
1369 """
1370 assert oInstr.iLineCompleted is None;
1371 oInstr.iLineCompleted = iLine;
1372
1373 #
1374 # Specified instructions.
1375 #
1376 if oInstr.cOpTags > 0:
1377 if oInstr.sStats is None:
1378 pass;
1379
1380 #
1381 # Unspecified legacy stuff. We generally only got a few things to go on here.
1382 # /** Opcode 0x0f 0x00 /0. */
1383 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1384 #
1385 else:
1386 #if oInstr.sRawOldOpcodes:
1387 #
1388 #if oInstr.sMnemonic:
1389 pass;
1390
1391 #
1392 # Common defaults.
1393 #
1394
1395 # Guess mnemonic and operands from stats if the former is missing.
1396 if oInstr.sMnemonic is None:
1397 if oInstr.sStats is not None:
1398 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1399 elif oInstr.sFunction is not None:
1400 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1401
1402 # Derive the disassembler op enum constant from the mnemonic.
1403 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1404 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1405
1406 # Derive the IEM statistics base name from mnemonic and operand types.
1407 if oInstr.sStats is None:
1408 if oInstr.sFunction is not None:
1409 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1410 elif oInstr.sMnemonic is not None:
1411 oInstr.sStats = oInstr.sMnemonic;
1412 for oOperand in oInstr.aoOperands:
1413 if oOperand.sType:
1414 oInstr.sStats += '_' + oOperand.sType;
1415
1416 # Derive the IEM function name from mnemonic and operand types.
1417 if oInstr.sFunction is None:
1418 if oInstr.sMnemonic is not None:
1419 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1420 for oOperand in oInstr.aoOperands:
1421 if oOperand.sType:
1422 oInstr.sFunction += '_' + oOperand.sType;
1423 elif oInstr.sStats:
1424 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1425
1426 # Derive encoding from operands.
1427 if oInstr.sEncoding is None:
1428 if not oInstr.aoOperands:
1429 oInstr.sEncoding = 'fixed';
1430 elif oInstr.aoOperands[0].usesModRM():
1431 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1432 oInstr.sEncoding = 'ModR/M+VEX';
1433 else:
1434 oInstr.sEncoding = 'ModR/M';
1435
1436 #
1437 # Apply default map and then add the instruction to all it's groups.
1438 #
1439 if not oInstr.aoMaps:
1440 oInstr.aoMaps = [ self.oDefaultMap, ];
1441 for oMap in oInstr.aoMaps:
1442 oMap.aoInstructions.append(oInstr);
1443
1444 #
1445 # Check the opstat value and add it to the opstat indexed dictionary.
1446 #
1447 if oInstr.sStats:
1448 if oInstr.sStats not in g_dAllInstructionsByStat:
1449 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1450 else:
1451 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1452 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1453
1454 #
1455 # Add to function indexed dictionary. We allow multiple instructions per function.
1456 #
1457 if oInstr.sFunction:
1458 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1459 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1460 else:
1461 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1462
1463 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1464 return True;
1465
1466 def doneInstructions(self, iLineInComment = None):
1467 """
1468 Done with current instruction.
1469 """
1470 for oInstr in self.aoCurInstrs:
1471 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1472 if oInstr.fStub:
1473 self.cTotalStubs += 1;
1474
1475 self.cTotalInstr += len(self.aoCurInstrs);
1476
1477 self.sComment = '';
1478 self.aoCurInstrs = [];
1479 return True;
1480
1481 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1482 """
1483 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1484 is False, only None values and empty strings are replaced.
1485 """
1486 for oInstr in self.aoCurInstrs:
1487 if fOverwrite is not True:
1488 oOldValue = getattr(oInstr, sAttrib);
1489 if oOldValue is not None:
1490 continue;
1491 setattr(oInstr, sAttrib, oValue);
1492
1493 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1494 """
1495 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1496 If fOverwrite is False, only None values and empty strings are replaced.
1497 """
1498 for oInstr in self.aoCurInstrs:
1499 aoArray = getattr(oInstr, sAttrib);
1500 while len(aoArray) <= iEntry:
1501 aoArray.append(None);
1502 if fOverwrite is True or aoArray[iEntry] is None:
1503 aoArray[iEntry] = oValue;
1504
1505 def parseCommentOldOpcode(self, asLines):
1506 """ Deals with 'Opcode 0xff /4' like comments """
1507 asWords = asLines[0].split();
1508 if len(asWords) >= 2 \
1509 and asWords[0] == 'Opcode' \
1510 and ( asWords[1].startswith('0x')
1511 or asWords[1].startswith('0X')):
1512 asWords = asWords[:1];
1513 for iWord, sWord in enumerate(asWords):
1514 if sWord.startswith('0X'):
1515 sWord = '0x' + sWord[:2];
1516 asWords[iWord] = asWords;
1517 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1518
1519 return False;
1520
1521 def ensureInstructionForOpTag(self, iTagLine):
1522 """ Ensure there is an instruction for the op-tag being parsed. """
1523 if not self.aoCurInstrs:
1524 self.addInstruction(self.iCommentLine + iTagLine);
1525 for oInstr in self.aoCurInstrs:
1526 oInstr.cOpTags += 1;
1527 if oInstr.cOpTags == 1:
1528 self.cTotalTagged += 1;
1529 return self.aoCurInstrs[-1];
1530
1531 @staticmethod
1532 def flattenSections(aasSections):
1533 """
1534 Flattens multiline sections into stripped single strings.
1535 Returns list of strings, on section per string.
1536 """
1537 asRet = [];
1538 for asLines in aasSections:
1539 if asLines:
1540 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1541 return asRet;
1542
1543 @staticmethod
1544 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1545 """
1546 Flattens sections into a simple stripped string with newlines as
1547 section breaks. The final section does not sport a trailing newline.
1548 """
1549 # Typical: One section with a single line.
1550 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1551 return aasSections[0][0].strip();
1552
1553 sRet = '';
1554 for iSection, asLines in enumerate(aasSections):
1555 if asLines:
1556 if iSection > 0:
1557 sRet += sSectionSep;
1558 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1559 return sRet;
1560
1561
1562
1563 ## @name Tag parsers
1564 ## @{
1565
1566 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1567 """
1568 Tag: \@opbrief
1569 Value: Text description, multiple sections, appended.
1570
1571 Brief description. If not given, it's the first sentence from @opdesc.
1572 """
1573 oInstr = self.ensureInstructionForOpTag(iTagLine);
1574
1575 # Flatten and validate the value.
1576 sBrief = self.flattenAllSections(aasSections);
1577 if not sBrief:
1578 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1579 if sBrief[-1] != '.':
1580 sBrief = sBrief + '.';
1581 if len(sBrief) > 180:
1582 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1583 offDot = sBrief.find('.');
1584 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1585 offDot = sBrief.find('.', offDot + 1);
1586 if offDot >= 0 and offDot != len(sBrief) - 1:
1587 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1588
1589 # Update the instruction.
1590 if oInstr.sBrief is not None:
1591 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1592 % (sTag, oInstr.sBrief, sBrief,));
1593 _ = iEndLine;
1594 return True;
1595
1596 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1597 """
1598 Tag: \@opdesc
1599 Value: Text description, multiple sections, appended.
1600
1601 It is used to describe instructions.
1602 """
1603 oInstr = self.ensureInstructionForOpTag(iTagLine);
1604 if aasSections:
1605 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1606 return True;
1607
1608 _ = sTag; _ = iEndLine;
1609 return True;
1610
1611 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1612 """
1613 Tag: @opmenmonic
1614 Value: mnemonic
1615
1616 The 'mnemonic' value must be a valid C identifier string. Because of
1617 prefixes, groups and whatnot, there times when the mnemonic isn't that
1618 of an actual assembler mnemonic.
1619 """
1620 oInstr = self.ensureInstructionForOpTag(iTagLine);
1621
1622 # Flatten and validate the value.
1623 sMnemonic = self.flattenAllSections(aasSections);
1624 if not self.oReMnemonic.match(sMnemonic):
1625 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1626 if oInstr.sMnemonic is not None:
1627 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1628 % (sTag, oInstr.sMnemonic, sMnemonic,));
1629 oInstr.sMnemonic = sMnemonic
1630
1631 _ = iEndLine;
1632 return True;
1633
1634 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1635 """
1636 Tags: \@op1, \@op2, \@op3, \@op4
1637 Value: [where:]type
1638
1639 The 'where' value indicates where the operand is found, like the 'reg'
1640 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1641 a list.
1642
1643 The 'type' value indicates the operand type. These follow the types
1644 given in the opcode tables in the CPU reference manuals.
1645 See Instruction.kdOperandTypes for a list.
1646
1647 """
1648 oInstr = self.ensureInstructionForOpTag(iTagLine);
1649 idxOp = int(sTag[-1]) - 1;
1650 assert idxOp >= 0 and idxOp < 4;
1651
1652 # flatten, split up, and validate the "where:type" value.
1653 sFlattened = self.flattenAllSections(aasSections);
1654 asSplit = sFlattened.split(':');
1655 if len(asSplit) == 1:
1656 sType = asSplit[0];
1657 sWhere = None;
1658 elif len(asSplit) == 2:
1659 (sWhere, sType) = asSplit;
1660 else:
1661 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1662
1663 if sType not in g_kdOpTypes:
1664 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1665 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1666 if sWhere is None:
1667 sWhere = g_kdOpTypes[sType][1];
1668 elif sWhere not in g_kdOpLocations:
1669 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1670 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1671
1672 # Insert the operand, refusing to overwrite an existing one.
1673 while idxOp >= len(oInstr.aoOperands):
1674 oInstr.aoOperands.append(None);
1675 if oInstr.aoOperands[idxOp] is not None:
1676 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1677 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1678 sWhere, sType,));
1679 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1680
1681 _ = iEndLine;
1682 return True;
1683
1684 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1685 """
1686 Tag: \@opmaps
1687 Value: map[,map2]
1688
1689 Indicates which maps the instruction is in. There is a default map
1690 associated with each input file.
1691 """
1692 oInstr = self.ensureInstructionForOpTag(iTagLine);
1693
1694 # Flatten, split up and validate the value.
1695 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1696 asMaps = sFlattened.split(',');
1697 if not asMaps:
1698 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1699 for sMap in asMaps:
1700 if sMap not in g_dInstructionMaps:
1701 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1702 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1703
1704 # Add the maps to the current list. Throw errors on duplicates.
1705 for oMap in oInstr.aoMaps:
1706 if oMap.sName in asMaps:
1707 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1708
1709 for sMap in asMaps:
1710 oMap = g_dInstructionMaps[sMap];
1711 if oMap not in oInstr.aoMaps:
1712 oInstr.aoMaps.append(oMap);
1713 else:
1714 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1715
1716 _ = iEndLine;
1717 return True;
1718
1719 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1720 """
1721 Tag: \@oppfx
1722 Value: n/a|none|0x66|0xf3|0xf2
1723
1724 Required prefix for the instruction. (In a (E)VEX context this is the
1725 value of the 'pp' field rather than an actual prefix.)
1726 """
1727 oInstr = self.ensureInstructionForOpTag(iTagLine);
1728
1729 # Flatten and validate the value.
1730 sFlattened = self.flattenAllSections(aasSections);
1731 asPrefixes = sFlattened.split();
1732 if len(asPrefixes) > 1:
1733 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1734
1735 sPrefix = asPrefixes[0].lower();
1736 if sPrefix == 'none':
1737 sPrefix = 'none';
1738 elif sPrefix == 'n/a':
1739 sPrefix = None;
1740 else:
1741 if len(sPrefix) == 2:
1742 sPrefix = '0x' + sPrefix;
1743 if not _isValidOpcodeByte(sPrefix):
1744 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1745
1746 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1747 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1748
1749 # Set it.
1750 if oInstr.sPrefix is not None:
1751 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1752 oInstr.sPrefix = sPrefix;
1753
1754 _ = iEndLine;
1755 return True;
1756
1757 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1758 """
1759 Tag: \@opcode
1760 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1761
1762 The opcode byte or sub-byte for the instruction in the context of a map.
1763 """
1764 oInstr = self.ensureInstructionForOpTag(iTagLine);
1765
1766 # Flatten and validate the value.
1767 sOpcode = self.flattenAllSections(aasSections);
1768 if sOpcode in g_kdSpecialOpcodes:
1769 pass;
1770 elif not _isValidOpcodeByte(sOpcode):
1771 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1772
1773 # Set it.
1774 if oInstr.sOpcode is not None:
1775 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1776 oInstr.sOpcode = sOpcode;
1777
1778 _ = iEndLine;
1779 return True;
1780
1781 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1782 """
1783 Tag: \@openc
1784 Value: ModR/M|fixed|prefix|<map name>
1785
1786 The instruction operand encoding style.
1787 """
1788 oInstr = self.ensureInstructionForOpTag(iTagLine);
1789
1790 # Flatten and validate the value.
1791 sEncoding = self.flattenAllSections(aasSections);
1792 if sEncoding in g_kdEncodings:
1793 pass;
1794 elif sEncoding in g_dInstructionMaps:
1795 pass;
1796 elif not _isValidOpcodeByte(sEncoding):
1797 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1798
1799 # Set it.
1800 if oInstr.sEncoding is not None:
1801 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1802 % ( sTag, oInstr.sEncoding, sEncoding,));
1803 oInstr.sEncoding = sEncoding;
1804
1805 _ = iEndLine;
1806 return True;
1807
1808 ## EFlags tag to Instruction attribute name.
1809 kdOpFlagToAttr = {
1810 '@opfltest': 'asFlTest',
1811 '@opflmodify': 'asFlModify',
1812 '@opflundef': 'asFlUndefined',
1813 '@opflset': 'asFlSet',
1814 '@opflclear': 'asFlClear',
1815 };
1816
1817 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1818 """
1819 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1820 Value: <eflags specifier>
1821
1822 """
1823 oInstr = self.ensureInstructionForOpTag(iTagLine);
1824
1825 # Flatten, split up and validate the values.
1826 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1827 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1828 asFlags = [];
1829 else:
1830 fRc = True;
1831 for iFlag, sFlag in enumerate(asFlags):
1832 if sFlag not in g_kdEFlagsMnemonics:
1833 if sFlag.strip() in g_kdEFlagsMnemonics:
1834 asFlags[iFlag] = sFlag.strip();
1835 else:
1836 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1837 if not fRc:
1838 return False;
1839
1840 # Set them.
1841 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1842 if asOld is not None:
1843 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1844 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1845
1846 _ = iEndLine;
1847 return True;
1848
1849 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1850 """
1851 Tag: \@ophints
1852 Value: Comma or space separated list of flags and hints.
1853
1854 This covers the disassembler flags table and more.
1855 """
1856 oInstr = self.ensureInstructionForOpTag(iTagLine);
1857
1858 # Flatten as a space separated list, split it up and validate the values.
1859 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1860 if len(asHints) == 1 and asHints[0].lower() == 'none':
1861 asHints = [];
1862 else:
1863 fRc = True;
1864 for iHint, sHint in enumerate(asHints):
1865 if sHint not in g_kdHints:
1866 if sHint.strip() in g_kdHints:
1867 sHint[iHint] = sHint.strip();
1868 else:
1869 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1870 if not fRc:
1871 return False;
1872
1873 # Append them.
1874 for sHint in asHints:
1875 if sHint not in oInstr.dHints:
1876 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1877 else:
1878 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1879
1880 _ = iEndLine;
1881 return True;
1882
1883 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1884 """
1885 Tag: \@opdisenum
1886 Value: OP_XXXX
1887
1888 This is for select a specific (legacy) disassembler enum value for the
1889 instruction.
1890 """
1891 oInstr = self.ensureInstructionForOpTag(iTagLine);
1892
1893 # Flatten and split.
1894 asWords = self.flattenAllSections(aasSections).split();
1895 if len(asWords) != 1:
1896 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1897 if not asWords:
1898 return False;
1899 sDisEnum = asWords[0];
1900 if not self.oReDisEnum.match(sDisEnum):
1901 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1902 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1903
1904 # Set it.
1905 if oInstr.sDisEnum is not None:
1906 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1907 oInstr.sDisEnum = sDisEnum;
1908
1909 _ = iEndLine;
1910 return True;
1911
1912 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1913 """
1914 Tag: \@opmincpu
1915 Value: <simple CPU name>
1916
1917 Indicates when this instruction was introduced.
1918 """
1919 oInstr = self.ensureInstructionForOpTag(iTagLine);
1920
1921 # Flatten the value, split into words, make sure there's just one, valid it.
1922 asCpus = self.flattenAllSections(aasSections).split();
1923 if len(asCpus) > 1:
1924 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1925
1926 sMinCpu = asCpus[0];
1927 if sMinCpu in g_kdCpuNames:
1928 oInstr.sMinCpu = sMinCpu;
1929 else:
1930 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1931 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1932
1933 # Set it.
1934 if oInstr.sMinCpu is None:
1935 oInstr.sMinCpu = sMinCpu;
1936 elif oInstr.sMinCpu != sMinCpu:
1937 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1938
1939 _ = iEndLine;
1940 return True;
1941
1942 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1943 """
1944 Tag: \@opcpuid
1945 Value: none | <CPUID flag specifier>
1946
1947 CPUID feature bit which is required for the instruction to be present.
1948 """
1949 oInstr = self.ensureInstructionForOpTag(iTagLine);
1950
1951 # Flatten as a space separated list, split it up and validate the values.
1952 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1953 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1954 asCpuIds = [];
1955 else:
1956 fRc = True;
1957 for iCpuId, sCpuId in enumerate(asCpuIds):
1958 if sCpuId not in g_kdCpuIdFlags:
1959 if sCpuId.strip() in g_kdCpuIdFlags:
1960 sCpuId[iCpuId] = sCpuId.strip();
1961 else:
1962 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1963 if not fRc:
1964 return False;
1965
1966 # Append them.
1967 for sCpuId in asCpuIds:
1968 if sCpuId not in oInstr.asCpuIds:
1969 oInstr.asCpuIds.append(sCpuId);
1970 else:
1971 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1972
1973 _ = iEndLine;
1974 return True;
1975
1976 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1977 """
1978 Tag: \@opgroup
1979 Value: op_grp1[_subgrp2[_subsubgrp3]]
1980
1981 Instruction grouping.
1982 """
1983 oInstr = self.ensureInstructionForOpTag(iTagLine);
1984
1985 # Flatten as a space separated list, split it up and validate the values.
1986 asGroups = self.flattenAllSections(aasSections).split();
1987 if len(asGroups) != 1:
1988 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1989 sGroup = asGroups[0];
1990 if not self.oReGroupName.match(sGroup):
1991 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1992 % (sTag, sGroup, self.oReGroupName.pattern));
1993
1994 # Set it.
1995 if oInstr.sGroup is not None:
1996 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1997 oInstr.sGroup = sGroup;
1998
1999 _ = iEndLine;
2000 return True;
2001
2002 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2003 """
2004 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2005 Value: <invalid opcode behaviour style>
2006
2007 The \@opunused indicates the specification is for a currently unused
2008 instruction encoding.
2009
2010 The \@opinvalid indicates the specification is for an invalid currently
2011 instruction encoding (like UD2).
2012
2013 The \@opinvlstyle just indicates how CPUs decode the instruction when
2014 not supported (\@opcpuid, \@opmincpu) or disabled.
2015 """
2016 oInstr = self.ensureInstructionForOpTag(iTagLine);
2017
2018 # Flatten as a space separated list, split it up and validate the values.
2019 asStyles = self.flattenAllSections(aasSections).split();
2020 if len(asStyles) != 1:
2021 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2022 sStyle = asStyles[0];
2023 if sStyle not in g_kdInvalidStyles:
2024 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2025 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2026 # Set it.
2027 if oInstr.sInvalidStyle is not None:
2028 return self.errorComment(iTagLine,
2029 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2030 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2031 oInstr.sInvalidStyle = sStyle;
2032 if sTag == '@opunused':
2033 oInstr.fUnused = True;
2034 elif sTag == '@opinvalid':
2035 oInstr.fInvalid = True;
2036
2037 _ = iEndLine;
2038 return True;
2039
2040 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2041 """
2042 Tag: \@optest
2043 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2044 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2045
2046 The main idea here is to generate basic instruction tests.
2047
2048 The probably simplest way of handling the diverse input, would be to use
2049 it to produce size optimized byte code for a simple interpreter that
2050 modifies the register input and output states.
2051
2052 An alternative to the interpreter would be creating multiple tables,
2053 but that becomes rather complicated wrt what goes where and then to use
2054 them in an efficient manner.
2055 """
2056 oInstr = self.ensureInstructionForOpTag(iTagLine);
2057
2058 #
2059 # Do it section by section.
2060 #
2061 for asSectionLines in aasSections:
2062 #
2063 # Sort the input into outputs, inputs and selector conditions.
2064 #
2065 sFlatSection = self.flattenAllSections([asSectionLines,]);
2066 if not sFlatSection:
2067 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2068 continue;
2069 oTest = InstructionTest(oInstr);
2070
2071 asSelectors = [];
2072 asInputs = [];
2073 asOutputs = [];
2074 asCur = asOutputs;
2075 fRc = True;
2076 asWords = sFlatSection.split();
2077 for iWord in range(len(asWords) - 1, -1, -1):
2078 sWord = asWords[iWord];
2079 # Check for array switchers.
2080 if sWord == '->':
2081 if asCur != asOutputs:
2082 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2083 break;
2084 asCur = asInputs;
2085 elif sWord == '/':
2086 if asCur != asInputs:
2087 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2088 break;
2089 asCur = asSelectors;
2090 else:
2091 asCur.insert(0, sWord);
2092
2093 #
2094 # Validate and add selectors.
2095 #
2096 for sCond in asSelectors:
2097 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2098 oSelector = None;
2099 for sOp in TestSelector.kasCompareOps:
2100 off = sCondExp.find(sOp);
2101 if off >= 0:
2102 sVariable = sCondExp[:off];
2103 sValue = sCondExp[off + len(sOp):];
2104 if sVariable in TestSelector.kdVariables:
2105 if sValue in TestSelector.kdVariables[sVariable]:
2106 oSelector = TestSelector(sVariable, sOp, sValue);
2107 else:
2108 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2109 % ( sTag, sValue, sCond,
2110 TestSelector.kdVariables[sVariable].keys(),));
2111 else:
2112 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2113 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2114 break;
2115 if oSelector is not None:
2116 for oExisting in oTest.aoSelectors:
2117 if oExisting.sVariable == oSelector.sVariable:
2118 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2119 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2120 oTest.aoSelectors.append(oSelector);
2121 else:
2122 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2123
2124 #
2125 # Validate outputs and inputs, adding them to the test as we go along.
2126 #
2127 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2128 asValidFieldKinds = [ 'both', sDesc, ];
2129 for sItem in asItems:
2130 oItem = None;
2131 for sOp in TestInOut.kasOperators:
2132 off = sItem.find(sOp);
2133 if off < 0:
2134 continue;
2135 sField = sItem[:off];
2136 sValueType = sItem[off + len(sOp):];
2137 if sField in TestInOut.kdFields \
2138 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2139 asSplit = sValueType.split(':', 1);
2140 sValue = asSplit[0];
2141 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2142 if sType in TestInOut.kdTypes:
2143 oValid = TestInOut.kdTypes[sType].validate(sValue);
2144 if oValid is True:
2145 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2146 oItem = TestInOut(sField, sOp, sValue, sType);
2147 else:
2148 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2149 % ( sTag, sDesc, sItem, ));
2150 else:
2151 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2152 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2153 else:
2154 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2155 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2156 else:
2157 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2158 % ( sTag, sDesc, sField, sItem,
2159 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2160 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2161 break;
2162 if oItem is not None:
2163 for oExisting in aoDst:
2164 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2165 self.errorComment(iTagLine,
2166 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2167 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2168 aoDst.append(oItem);
2169 else:
2170 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2171
2172 #
2173 # .
2174 #
2175 if fRc:
2176 oInstr.aoTests.append(oTest);
2177 else:
2178 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2179 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2180 % (sTag, asSelectors, asInputs, asOutputs,));
2181
2182 _ = iEndLine;
2183 return True;
2184
2185 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2186 """
2187 Tag: \@optestign | \@optestignore
2188 Value: <value is ignored>
2189
2190 This is a simple trick to ignore a test while debugging another.
2191
2192 See also \@oponlytest.
2193 """
2194 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2195 return True;
2196
2197 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2198 """
2199 Tag: \@opcopytests
2200 Value: <opstat | function> [..]
2201 Example: \@opcopytests add_Eb_Gb
2202
2203 Trick to avoid duplicating tests for different encodings of the same
2204 operation.
2205 """
2206 oInstr = self.ensureInstructionForOpTag(iTagLine);
2207
2208 # Flatten, validate and append the copy job to the instruction. We execute
2209 # them after parsing all the input so we can handle forward references.
2210 asToCopy = self.flattenAllSections(aasSections).split();
2211 if not asToCopy:
2212 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2213 for sToCopy in asToCopy:
2214 if sToCopy not in oInstr.asCopyTests:
2215 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2216 oInstr.asCopyTests.append(sToCopy);
2217 else:
2218 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2219 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2220 else:
2221 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2222
2223 _ = iEndLine;
2224 return True;
2225
2226 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2227 """
2228 Tag: \@oponlytest
2229 Value: none
2230
2231 Only test instructions with this tag. This is a trick that is handy
2232 for singling out one or two new instructions or tests.
2233
2234 See also \@optestignore.
2235 """
2236 oInstr = self.ensureInstructionForOpTag(iTagLine);
2237
2238 # Validate and add instruction to only test dictionary.
2239 sValue = self.flattenAllSections(aasSections).strip();
2240 if sValue:
2241 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2242
2243 if oInstr not in g_aoOnlyTestInstructions:
2244 g_aoOnlyTestInstructions.append(oInstr);
2245
2246 _ = iEndLine;
2247 return True;
2248
2249 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2250 """
2251 Tag: \@opxcpttype
2252 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2253
2254 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2255 """
2256 oInstr = self.ensureInstructionForOpTag(iTagLine);
2257
2258 # Flatten as a space separated list, split it up and validate the values.
2259 asTypes = self.flattenAllSections(aasSections).split();
2260 if len(asTypes) != 1:
2261 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2262 sType = asTypes[0];
2263 if sType not in g_kdXcptTypes:
2264 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2265 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2266 # Set it.
2267 if oInstr.sXcptType is not None:
2268 return self.errorComment(iTagLine,
2269 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2270 % ( sTag, oInstr.sXcptType, sType,));
2271 oInstr.sXcptType = sType;
2272
2273 _ = iEndLine;
2274 return True;
2275
2276 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2277 """
2278 Tag: \@opfunction
2279 Value: <VMM function name>
2280
2281 This is for explicitly setting the IEM function name. Normally we pick
2282 this up from the FNIEMOP_XXX macro invocation after the description, or
2283 generate it from the mnemonic and operands.
2284
2285 It it thought it maybe necessary to set it when specifying instructions
2286 which implementation isn't following immediately or aren't implemented yet.
2287 """
2288 oInstr = self.ensureInstructionForOpTag(iTagLine);
2289
2290 # Flatten and validate the value.
2291 sFunction = self.flattenAllSections(aasSections);
2292 if not self.oReFunctionName.match(sFunction):
2293 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2294 % (sTag, sFunction, self.oReFunctionName.pattern));
2295
2296 if oInstr.sFunction is not None:
2297 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2298 % (sTag, oInstr.sFunction, sFunction,));
2299 oInstr.sFunction = sFunction;
2300
2301 _ = iEndLine;
2302 return True;
2303
2304 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2305 """
2306 Tag: \@opstats
2307 Value: <VMM statistics base name>
2308
2309 This is for explicitly setting the statistics name. Normally we pick
2310 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2311 the mnemonic and operands.
2312
2313 It it thought it maybe necessary to set it when specifying instructions
2314 which implementation isn't following immediately or aren't implemented yet.
2315 """
2316 oInstr = self.ensureInstructionForOpTag(iTagLine);
2317
2318 # Flatten and validate the value.
2319 sStats = self.flattenAllSections(aasSections);
2320 if not self.oReStatsName.match(sStats):
2321 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2322 % (sTag, sStats, self.oReStatsName.pattern));
2323
2324 if oInstr.sStats is not None:
2325 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2326 % (sTag, oInstr.sStats, sStats,));
2327 oInstr.sStats = sStats;
2328
2329 _ = iEndLine;
2330 return True;
2331
2332 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2333 """
2334 Tag: \@opdone
2335 Value: none
2336
2337 Used to explictily flush the instructions that have been specified.
2338 """
2339 sFlattened = self.flattenAllSections(aasSections);
2340 if sFlattened != '':
2341 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2342 _ = sTag; _ = iEndLine;
2343 return self.doneInstructions();
2344
2345 ## @}
2346
2347
2348 def parseComment(self):
2349 """
2350 Parse the current comment (self.sComment).
2351
2352 If it's a opcode specifiying comment, we reset the macro stuff.
2353 """
2354 #
2355 # Reject if comment doesn't seem to contain anything interesting.
2356 #
2357 if self.sComment.find('Opcode') < 0 \
2358 and self.sComment.find('@') < 0:
2359 return False;
2360
2361 #
2362 # Split the comment into lines, removing leading asterisks and spaces.
2363 # Also remove leading and trailing empty lines.
2364 #
2365 asLines = self.sComment.split('\n');
2366 for iLine, sLine in enumerate(asLines):
2367 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2368
2369 while asLines and not asLines[0]:
2370 self.iCommentLine += 1;
2371 asLines.pop(0);
2372
2373 while asLines and not asLines[-1]:
2374 asLines.pop(len(asLines) - 1);
2375
2376 #
2377 # Check for old style: Opcode 0x0f 0x12
2378 #
2379 if asLines[0].startswith('Opcode '):
2380 self.parseCommentOldOpcode(asLines);
2381
2382 #
2383 # Look for @op* tagged data.
2384 #
2385 cOpTags = 0;
2386 sFlatDefault = None;
2387 sCurTag = '@default';
2388 iCurTagLine = 0;
2389 asCurSection = [];
2390 aasSections = [ asCurSection, ];
2391 for iLine, sLine in enumerate(asLines):
2392 if not sLine.startswith('@'):
2393 if sLine:
2394 asCurSection.append(sLine);
2395 elif asCurSection:
2396 asCurSection = [];
2397 aasSections.append(asCurSection);
2398 else:
2399 #
2400 # Process the previous tag.
2401 #
2402 if not asCurSection and len(aasSections) > 1:
2403 aasSections.pop(-1);
2404 if sCurTag in self.dTagHandlers:
2405 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2406 cOpTags += 1;
2407 elif sCurTag.startswith('@op'):
2408 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2409 elif sCurTag == '@default':
2410 sFlatDefault = self.flattenAllSections(aasSections);
2411 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2412 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2413 elif sCurTag in ['@encoding', '@opencoding']:
2414 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2415
2416 #
2417 # New tag.
2418 #
2419 asSplit = sLine.split(None, 1);
2420 sCurTag = asSplit[0].lower();
2421 if len(asSplit) > 1:
2422 asCurSection = [asSplit[1],];
2423 else:
2424 asCurSection = [];
2425 aasSections = [asCurSection, ];
2426 iCurTagLine = iLine;
2427
2428 #
2429 # Process the final tag.
2430 #
2431 if not asCurSection and len(aasSections) > 1:
2432 aasSections.pop(-1);
2433 if sCurTag in self.dTagHandlers:
2434 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2435 cOpTags += 1;
2436 elif sCurTag.startswith('@op'):
2437 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2438 elif sCurTag == '@default':
2439 sFlatDefault = self.flattenAllSections(aasSections);
2440
2441 #
2442 # Don't allow default text in blocks containing @op*.
2443 #
2444 if cOpTags > 0 and sFlatDefault:
2445 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2446
2447 return True;
2448
2449 def parseMacroInvocation(self, sInvocation):
2450 """
2451 Parses a macro invocation.
2452
2453 Returns a tuple, first element is the offset following the macro
2454 invocation. The second element is a list of macro arguments, where the
2455 zero'th is the macro name.
2456 """
2457 # First the name.
2458 offOpen = sInvocation.find('(');
2459 if offOpen <= 0:
2460 self.raiseError("macro invocation open parenthesis not found");
2461 sName = sInvocation[:offOpen].strip();
2462 if not self.oReMacroName.match(sName):
2463 return self.error("invalid macro name '%s'" % (sName,));
2464 asRet = [sName, ];
2465
2466 # Arguments.
2467 iLine = self.iLine;
2468 cDepth = 1;
2469 off = offOpen + 1;
2470 offStart = off;
2471 while cDepth > 0:
2472 if off >= len(sInvocation):
2473 if iLine >= len(self.asLines):
2474 return self.error('macro invocation beyond end of file');
2475 sInvocation += self.asLines[iLine];
2476 iLine += 1;
2477 ch = sInvocation[off];
2478
2479 if ch == ',' or ch == ')':
2480 if cDepth == 1:
2481 asRet.append(sInvocation[offStart:off].strip());
2482 offStart = off + 1;
2483 if ch == ')':
2484 cDepth -= 1;
2485 elif ch == '(':
2486 cDepth += 1;
2487 off += 1;
2488
2489 return (off, asRet);
2490
2491 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2492 """
2493 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2494 """
2495 offHit = sCode.find(sMacro);
2496 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2497 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2498 return (offHit + offAfter, asRet);
2499 return (len(sCode), None);
2500
2501 def findAndParseMacroInvocation(self, sCode, sMacro):
2502 """
2503 Returns None if not found, arguments as per parseMacroInvocation if found.
2504 """
2505 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2506
2507 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2508 """
2509 Returns same as findAndParseMacroInvocation.
2510 """
2511 for sMacro in asMacro:
2512 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2513 if asRet is not None:
2514 return asRet;
2515 return None;
2516
2517 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2518 sDisHints, sIemHints, asOperands):
2519 """
2520 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2521 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2522 """
2523 #
2524 # Some invocation checks.
2525 #
2526 if sUpper != sUpper.upper():
2527 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2528 if sLower != sLower.lower():
2529 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2530 if sUpper.lower() != sLower:
2531 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2532 if not self.oReMnemonic.match(sLower):
2533 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2534
2535 #
2536 # Check if sIemHints tells us to not consider this macro invocation.
2537 #
2538 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2539 return True;
2540
2541 # Apply to the last instruction only for now.
2542 if not self.aoCurInstrs:
2543 self.addInstruction();
2544 oInstr = self.aoCurInstrs[-1];
2545 if oInstr.iLineMnemonicMacro == -1:
2546 oInstr.iLineMnemonicMacro = self.iLine;
2547 else:
2548 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2549 % (sMacro, oInstr.iLineMnemonicMacro,));
2550
2551 # Mnemonic
2552 if oInstr.sMnemonic is None:
2553 oInstr.sMnemonic = sLower;
2554 elif oInstr.sMnemonic != sLower:
2555 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2556
2557 # Process operands.
2558 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2559 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2560 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2561 for iOperand, sType in enumerate(asOperands):
2562 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2563 if sWhere is None:
2564 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2565 if iOperand < len(oInstr.aoOperands): # error recovery.
2566 sWhere = oInstr.aoOperands[iOperand].sWhere;
2567 sType = oInstr.aoOperands[iOperand].sType;
2568 else:
2569 sWhere = 'reg';
2570 sType = 'Gb';
2571 if iOperand == len(oInstr.aoOperands):
2572 oInstr.aoOperands.append(Operand(sWhere, sType))
2573 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2574 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2575 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2576 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2577
2578 # Encoding.
2579 if sForm not in g_kdIemForms:
2580 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2581 else:
2582 if oInstr.sEncoding is None:
2583 oInstr.sEncoding = g_kdIemForms[sForm][0];
2584 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2585 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2586 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2587
2588 # Check the parameter locations for the encoding.
2589 if g_kdIemForms[sForm][1] is not None:
2590 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2591 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2592 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2593 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2594
2595 # Stats.
2596 if not self.oReStatsName.match(sStats):
2597 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2598 elif oInstr.sStats is None:
2599 oInstr.sStats = sStats;
2600 elif oInstr.sStats != sStats:
2601 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2602 % (sMacro, oInstr.sStats, sStats,));
2603
2604 # Process the hints (simply merge with @ophints w/o checking anything).
2605 for sHint in sDisHints.split('|'):
2606 sHint = sHint.strip();
2607 if sHint.startswith('DISOPTYPE_'):
2608 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2609 if sShortHint in g_kdHints:
2610 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2611 else:
2612 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2613 elif sHint != '0':
2614 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2615
2616 for sHint in sIemHints.split('|'):
2617 sHint = sHint.strip();
2618 if sHint.startswith('IEMOPHINT_'):
2619 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2620 if sShortHint in g_kdHints:
2621 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2622 else:
2623 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2624 elif sHint != '0':
2625 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2626
2627
2628 _ = sAsm;
2629 return True;
2630
2631 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2632 """
2633 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2634 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2635 """
2636 if not asOperands:
2637 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2638 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2639 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2640
2641 def checkCodeForMacro(self, sCode):
2642 """
2643 Checks code for relevant macro invocation.
2644 """
2645 #
2646 # Scan macro invocations.
2647 #
2648 if sCode.find('(') > 0:
2649 # Look for instruction decoder function definitions. ASSUME single line.
2650 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2651 [ 'FNIEMOP_DEF',
2652 'FNIEMOP_STUB',
2653 'FNIEMOP_STUB_1',
2654 'FNIEMOP_UD_STUB',
2655 'FNIEMOP_UD_STUB_1' ]);
2656 if asArgs is not None:
2657 sFunction = asArgs[1];
2658
2659 if not self.aoCurInstrs:
2660 self.addInstruction();
2661 for oInstr in self.aoCurInstrs:
2662 if oInstr.iLineFnIemOpMacro == -1:
2663 oInstr.iLineFnIemOpMacro = self.iLine;
2664 else:
2665 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2666 self.setInstrunctionAttrib('sFunction', sFunction);
2667 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2668 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2669 if asArgs[0].find('STUB') > 0:
2670 self.doneInstructions();
2671 return True;
2672
2673 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2674 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2675 if asArgs is not None:
2676 if len(self.aoCurInstrs) == 1:
2677 oInstr = self.aoCurInstrs[0];
2678 if oInstr.sStats is None:
2679 oInstr.sStats = asArgs[1];
2680 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2681
2682 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2683 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2684 if asArgs is not None:
2685 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2686 []);
2687 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2688 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2689 if asArgs is not None:
2690 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2691 [asArgs[6],]);
2692 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2693 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2694 if asArgs is not None:
2695 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2696 [asArgs[6], asArgs[7]]);
2697 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2698 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2699 if asArgs is not None:
2700 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2701 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2702 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2703 # a_fIemHints)
2704 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2705 if asArgs is not None:
2706 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2707 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2708
2709 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2710 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2711 if asArgs is not None:
2712 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2713 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2714 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2715 if asArgs is not None:
2716 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2717 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2718 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2719 if asArgs is not None:
2720 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2721 [asArgs[4], asArgs[5],]);
2722 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2723 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2724 if asArgs is not None:
2725 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2726 [asArgs[4], asArgs[5], asArgs[6],]);
2727 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2728 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2729 if asArgs is not None:
2730 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2731 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2732
2733 return False;
2734
2735
2736 def parse(self):
2737 """
2738 Parses the given file.
2739 Returns number or errors.
2740 Raises exception on fatal trouble.
2741 """
2742 self.debug('Parsing %s' % (self.sSrcFile,));
2743
2744 while self.iLine < len(self.asLines):
2745 sLine = self.asLines[self.iLine];
2746 self.iLine += 1;
2747
2748 # We only look for comments, so only lines with a slash might possibly
2749 # influence the parser state.
2750 if sLine.find('/') >= 0:
2751 #self.debug('line %d: slash' % (self.iLine,));
2752
2753 offLine = 0;
2754 while offLine < len(sLine):
2755 if self.iState == self.kiCode:
2756 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2757 if offHit >= 0:
2758 self.checkCodeForMacro(sLine[offLine:offHit]);
2759 self.sComment = '';
2760 self.iCommentLine = self.iLine;
2761 self.iState = self.kiCommentMulti;
2762 offLine = offHit + 2;
2763 else:
2764 self.checkCodeForMacro(sLine[offLine:]);
2765 offLine = len(sLine);
2766
2767 elif self.iState == self.kiCommentMulti:
2768 offHit = sLine.find('*/', offLine);
2769 if offHit >= 0:
2770 self.sComment += sLine[offLine:offHit];
2771 self.iState = self.kiCode;
2772 offLine = offHit + 2;
2773 self.parseComment();
2774 else:
2775 self.sComment += sLine[offLine:];
2776 offLine = len(sLine);
2777 else:
2778 assert False;
2779
2780 # No slash, but append the line if in multi-line comment.
2781 elif self.iState == self.kiCommentMulti:
2782 #self.debug('line %d: multi' % (self.iLine,));
2783 self.sComment += sLine;
2784
2785 # No slash, but check code line for relevant macro.
2786 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2787 #self.debug('line %d: macro' % (self.iLine,));
2788 self.checkCodeForMacro(sLine);
2789
2790 # If the line is a '}' in the first position, complete the instructions.
2791 elif self.iState == self.kiCode and sLine[0] == '}':
2792 #self.debug('line %d: }' % (self.iLine,));
2793 self.doneInstructions();
2794
2795 self.doneInstructions();
2796 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2797 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2798 return self.printErrors();
2799
2800
2801def __parseFileByName(sSrcFile, sDefaultMap):
2802 """
2803 Parses one source file for instruction specfications.
2804 """
2805 #
2806 # Read sSrcFile into a line array.
2807 #
2808 try:
2809 oFile = open(sSrcFile, "r");
2810 except Exception as oXcpt:
2811 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2812 try:
2813 asLines = oFile.readlines();
2814 except Exception as oXcpt:
2815 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2816 finally:
2817 oFile.close();
2818
2819 #
2820 # Do the parsing.
2821 #
2822 try:
2823 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2824 except ParserException as oXcpt:
2825 print(str(oXcpt));
2826 raise;
2827 except Exception as oXcpt:
2828 raise;
2829
2830 return cErrors;
2831
2832
2833def __doTestCopying():
2834 """
2835 Executes the asCopyTests instructions.
2836 """
2837 asErrors = [];
2838 for oDstInstr in g_aoAllInstructions:
2839 if oDstInstr.asCopyTests:
2840 for sSrcInstr in oDstInstr.asCopyTests:
2841 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
2842 if oSrcInstr:
2843 aoSrcInstrs = [oSrcInstr,];
2844 else:
2845 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
2846 if aoSrcInstrs:
2847 for oSrcInstr in aoSrcInstrs:
2848 if oSrcInstr != oDstInstr:
2849 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
2850 else:
2851 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
2852 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2853 else:
2854 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
2855 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2856
2857 if asErrors:
2858 sys.stderr.write(u''.join(asErrors));
2859 return len(asErrors);
2860
2861
2862def __applyOnlyTest():
2863 """
2864 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
2865 all other instructions so that only these get tested.
2866 """
2867 if g_aoOnlyTestInstructions:
2868 for oInstr in g_aoAllInstructions:
2869 if oInstr.aoTests:
2870 if oInstr not in g_aoOnlyTestInstructions:
2871 oInstr.aoTests = [];
2872 return 0;
2873
2874def __parseAll():
2875 """
2876 Parses all the IEMAllInstruction*.cpp.h files.
2877
2878 Raises exception on failure.
2879 """
2880 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2881 cErrors = 0;
2882 for sDefaultMap, sName in [
2883 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2884 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2885 ]:
2886 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2887 cErrors += __doTestCopying();
2888 cErrors += __applyOnlyTest();
2889
2890 if cErrors != 0:
2891 #raise Exception('%d parse errors' % (cErrors,));
2892 sys.exit(1);
2893 return True;
2894
2895
2896
2897__parseAll();
2898
2899
2900#
2901# Generators (may perhaps move later).
2902#
2903def generateDisassemblerTables(oDstFile = sys.stdout):
2904 """
2905 Generates disassembler tables.
2906 """
2907
2908 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2909 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2910 assert oMap.sName == sName;
2911 asLines = [];
2912
2913 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2914 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2915 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2916 asLines.append('{');
2917
2918 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2919
2920 aoTableOrder = oMap.getInstructionsInTableOrder();
2921 for iInstr, oInstr in enumerate(aoTableOrder):
2922
2923 if (iInstr & 0xf) == 0:
2924 if iInstr != 0:
2925 asLines.append('');
2926 asLines.append(' /* %x */' % (iInstr >> 4,));
2927
2928 if oInstr is None:
2929 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2930 elif isinstance(oInstr, list):
2931 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2932 else:
2933 sMacro = 'OP';
2934 cMaxOperands = 3;
2935 if len(oInstr.aoOperands) > 3:
2936 sMacro = 'OPVEX'
2937 cMaxOperands = 4;
2938 assert len(oInstr.aoOperands) <= cMaxOperands;
2939
2940 #
2941 # Format string.
2942 #
2943 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2944 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2945 sTmp += ' ' if iOperand == 0 else ',';
2946 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2947 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2948 else:
2949 sTmp += g_kdOpTypes[oOperand.sType][2];
2950 sTmp += '",';
2951 asColumns = [ sTmp, ];
2952
2953 #
2954 # Decoders.
2955 #
2956 iStart = len(asColumns);
2957 if oInstr.sEncoding is None:
2958 pass;
2959 elif oInstr.sEncoding == 'ModR/M':
2960 # ASSUME the first operand is using the ModR/M encoding
2961 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
2962 asColumns.append('IDX_ParseModRM,');
2963 ## @todo IDX_ParseVexDest
2964 # Is second operand using ModR/M too?
2965 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
2966 asColumns.append('IDX_UseModRM,')
2967 elif oInstr.sEncoding in [ 'prefix', ]:
2968 for oOperand in oInstr.aoOperands:
2969 asColumns.append('0,');
2970 elif oInstr.sEncoding in [ 'fixed' ]:
2971 pass;
2972 elif oInstr.sEncoding == 'vex2':
2973 asColumns.append('IDX_ParseVex2b,')
2974 elif oInstr.sEncoding == 'vex3':
2975 asColumns.append('IDX_ParseVex3b,')
2976 elif oInstr.sEncoding in g_dInstructionMaps:
2977 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
2978 else:
2979 ## @todo
2980 #IDX_ParseTwoByteEsc,
2981 #IDX_ParseGrp1,
2982 #IDX_ParseShiftGrp2,
2983 #IDX_ParseGrp3,
2984 #IDX_ParseGrp4,
2985 #IDX_ParseGrp5,
2986 #IDX_Parse3DNow,
2987 #IDX_ParseGrp6,
2988 #IDX_ParseGrp7,
2989 #IDX_ParseGrp8,
2990 #IDX_ParseGrp9,
2991 #IDX_ParseGrp10,
2992 #IDX_ParseGrp12,
2993 #IDX_ParseGrp13,
2994 #IDX_ParseGrp14,
2995 #IDX_ParseGrp15,
2996 #IDX_ParseGrp16,
2997 #IDX_ParseThreeByteEsc4,
2998 #IDX_ParseThreeByteEsc5,
2999 #IDX_ParseModFence,
3000 #IDX_ParseEscFP,
3001 #IDX_ParseNopPause,
3002 #IDX_ParseInvOpModRM,
3003 assert False, str(oInstr);
3004
3005 # Check for immediates and stuff in the remaining operands.
3006 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3007 sIdx = g_kdOpTypes[oOperand.sType][0];
3008 if sIdx != 'IDX_UseModRM':
3009 asColumns.append(sIdx + ',');
3010 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3011
3012 #
3013 # Opcode and operands.
3014 #
3015 assert oInstr.sDisEnum, str(oInstr);
3016 asColumns.append(oInstr.sDisEnum + ',');
3017 iStart = len(asColumns)
3018 for oOperand in oInstr.aoOperands:
3019 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3020 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3021
3022 #
3023 # Flags.
3024 #
3025 sTmp = '';
3026 for sHint in sorted(oInstr.dHints.keys()):
3027 sDefine = g_kdHints[sHint];
3028 if sDefine.startswith('DISOPTYPE_'):
3029 if sTmp:
3030 sTmp += ' | ' + sDefine;
3031 else:
3032 sTmp += sDefine;
3033 if sTmp:
3034 sTmp += '),';
3035 else:
3036 sTmp += '0),';
3037 asColumns.append(sTmp);
3038
3039 #
3040 # Format the columns into a line.
3041 #
3042 sLine = '';
3043 for i, s in enumerate(asColumns):
3044 if len(sLine) < aoffColumns[i]:
3045 sLine += ' ' * (aoffColumns[i] - len(sLine));
3046 else:
3047 sLine += ' ';
3048 sLine += s;
3049
3050 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3051 # DISOPTYPE_HARMLESS),
3052 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3053 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3054
3055 asLines.append(sLine);
3056
3057 asLines.append('};');
3058 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3059
3060 #
3061 # Write out the lines.
3062 #
3063 oDstFile.write('\n'.join(asLines));
3064 oDstFile.write('\n');
3065 break; #for now
3066
3067if __name__ == '__main__':
3068 generateDisassemblerTables();
3069
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette