VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66334

Last change on this file since 66334 was 66334, checked in by vboxsync, 8 years ago

IEM: Fixed UD1 / grp 10 decoding (intel eats modr/m after it).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 128.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66334 2017-03-29 14:26:23Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66334 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
164 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
165 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
166 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
167 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
168 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
169
170 # ModR/M.rm - register only.
171 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
172 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
173
174 # ModR/M.rm - memory only.
175 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
176 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
177 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
178
179 # ModR/M.reg
180 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
181 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
182 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
183 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
184 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
185 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
186 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
187 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
188 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
189
190 # Immediate values.
191 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
192 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
193 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
194 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
195 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
196 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
197
198 # Address operands (no ModR/M).
199 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
200 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
201
202 # Relative jump targets
203 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
204 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
205
206 # DS:rSI
207 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
208 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
209 # ES:rDI
210 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
211 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
212
213 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
214
215 # Fixed registers.
216 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
217 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
218 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
219 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
220 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
221 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
222 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
223 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
224};
225
226# IDX_ParseFixedReg
227# IDX_ParseVexDest
228
229
230## IEMFORM_XXX mappings.
231g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
232 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
233 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
234 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
235 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
236 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
237 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
238 'M': ( 'ModR/M', [ 'rm', ], ),
239 'M_REG': ( 'ModR/M', [ 'rm', ], ),
240 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
241 'R': ( 'ModR/M', [ 'reg', ], ),
242 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
243 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
244 'FIXED': ( 'fixed', None, )
245};
246
247## \@oppfx values.
248g_kdPrefixes = {
249 'none': [],
250 '0x66': [],
251 '0xf3': [],
252 '0xf2': [],
253};
254
255## Special \@opcode tag values.
256g_kdSpecialOpcodes = {
257 '/reg': [],
258 'mr/reg': [],
259 '11 /reg': [],
260 '!11 /reg': [],
261 '11 mr/reg': [],
262 '!11 mr/reg': [],
263};
264
265## Special \@opcodesub tag values.
266g_kdSubOpcodes = {
267 'none': [ None, ],
268 '11 mr/reg': [ '11 mr/reg', ],
269 '11': [ '11 mr/reg', ], ##< alias
270 '!11 mr/reg': [ '!11 mr/reg', ],
271 '!11': [ '!11 mr/reg', ], ##< alias
272};
273
274## Valid values for \@openc
275g_kdEncodings = {
276 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
277 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
278 'prefix': [ None, ], ##< Prefix
279};
280
281## \@opunused, \@opinvalid, \@opinvlstyle
282g_kdInvalidStyles = {
283 'immediate': [], ##< CPU stops decoding immediately after the opcode.
284 'intel-modrm': [], ##< Intel decodes ModR/M.
285 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
286 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
287 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
288};
289
290g_kdCpuNames = {
291 '8086': (),
292 '80186': (),
293 '80286': (),
294 '80386': (),
295 '80486': (),
296};
297
298## \@opcpuid
299g_kdCpuIdFlags = {
300 'vme': 'X86_CPUID_FEATURE_EDX_VME',
301 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
302 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
303 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
304 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
305 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
306 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
307 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
308 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
309 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
310 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
311 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
312 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
313 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
314 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
315 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
316 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
317 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
318 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
319 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
320 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
321 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
322 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
323 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
324 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
325 'aes': 'X86_CPUID_FEATURE_ECX_AES',
326 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
327 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
328 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
329 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
330
331 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
332 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
333 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
334 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
335 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
336 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
337 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
338 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
339 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
340 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
341};
342
343## \@ophints values.
344g_kdHints = {
345 'invalid': 'DISOPTYPE_INVALID', ##<
346 'harmless': 'DISOPTYPE_HARMLESS', ##<
347 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
348 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
349 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
350 'portio': 'DISOPTYPE_PORTIO', ##<
351 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
352 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
353 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
354 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
355 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
356 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
357 'illegal': 'DISOPTYPE_ILLEGAL', ##<
358 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
359 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
360 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
361 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
362 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
363 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
364 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
365 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
366 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
367 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
368 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
369 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
370 ## (only in 16 & 32 bits mode!)
371 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
372 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
373 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
374 'ignores_op_size': '', ##< Ignores both operand size prefixes.
375 'lock_allowed': '', ##< Lock prefix allowed.
376};
377
378## \@opxcpttype values (see SDMv2 2.4, 2.7).
379g_kdXcptTypes = {
380 'none': [],
381 '1': [],
382 '2': [],
383 '3': [],
384 '4': [],
385 '4UA': [],
386 '5': [],
387 '6': [],
388 '7': [],
389 '8': [],
390 '11': [],
391 '12': [],
392 'E1': [],
393 'E1NF': [],
394 'E2': [],
395 'E3': [],
396 'E3NF': [],
397 'E4': [],
398 'E4NF': [],
399 'E5': [],
400 'E5NF': [],
401 'E6': [],
402 'E6NF': [],
403 'E7NF': [],
404 'E9': [],
405 'E9NF': [],
406 'E10': [],
407 'E11': [],
408 'E12': [],
409 'E12NF': [],
410};
411
412
413def _isValidOpcodeByte(sOpcode):
414 """
415 Checks if sOpcode is a valid lower case opcode byte.
416 Returns true/false.
417 """
418 if len(sOpcode) == 4:
419 if sOpcode[:2] == '0x':
420 if sOpcode[2] in '0123456789abcdef':
421 if sOpcode[3] in '0123456789abcdef':
422 return True;
423 return False;
424
425
426class InstructionMap(object):
427 """
428 Instruction map.
429
430 The opcode map provides the lead opcode bytes (empty for the one byte
431 opcode map). An instruction can be member of multiple opcode maps as long
432 as it uses the same opcode value within the map (because of VEX).
433 """
434
435 kdEncodings = {
436 'legacy': [],
437 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
438 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
439 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
440 'xop8': [], ##< XOP prefix with vvvvv = 8
441 'xop9': [], ##< XOP prefix with vvvvv = 9
442 'xop10': [], ##< XOP prefix with vvvvv = 10
443 };
444 ## Selectors.
445 ## The first value is the number of table entries required by a
446 ## decoder or disassembler for this type of selector.
447 kdSelectors = {
448 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
449 '/r': [ 8, ], ##< modrm.reg selects the instruction.
450 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
451 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
452 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
453 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
454 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
455 };
456
457 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
458 assert sSelector in self.kdSelectors;
459 assert sEncoding in self.kdEncodings;
460 if asLeadOpcodes is None:
461 asLeadOpcodes = [];
462 else:
463 for sOpcode in asLeadOpcodes:
464 assert _isValidOpcodeByte(sOpcode);
465 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
466
467 self.sName = sName;
468 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
469 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
470 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
471 self.aoInstructions = []; # type: Instruction
472 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
473
474 def getTableSize(self):
475 """
476 Number of table entries. This corresponds directly to the selector.
477 """
478 return self.kdSelectors[self.sSelector][0];
479
480 def getInstructionIndex(self, oInstr):
481 """
482 Returns the table index for the instruction.
483 """
484 bOpcode = oInstr.getOpcodeByte();
485
486 # The byte selector is simple. We need a full opcode byte and need just return it.
487 if self.sSelector == 'byte':
488 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
489 return bOpcode;
490
491 # The other selectors needs masking and shifting.
492 if self.sSelector == '/r':
493 return (bOpcode >> 3) & 0x7;
494
495 if self.sSelector == 'mod /r':
496 return (bOpcode >> 3) & 0x1f;
497
498 if self.sSelector == 'memreg /r':
499 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
500
501 if self.sSelector == '!11 /r':
502 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
503 return (bOpcode >> 3) & 0x7;
504
505 if self.sSelector == '11 /r':
506 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
507 return (bOpcode >> 3) & 0x7;
508
509 if self.sSelector == '11':
510 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
511 return bOpcode & 0x3f;
512
513 assert False, self.sSelector;
514 return -1;
515
516 def getInstructionsInTableOrder(self):
517 """
518 Get instructions in table order.
519
520 Returns array of instructions. Normally there is exactly one
521 instruction per entry. However the entry could also be None if
522 not instruction was specified for that opcode value. Or there
523 could be a list of instructions to deal with special encodings
524 where for instance prefix (e.g. REX.W) encodes a different
525 instruction or different CPUs have different instructions or
526 prefixes in the same place.
527 """
528 # Start with empty table.
529 cTable = self.getTableSize();
530 aoTable = [None] * cTable;
531
532 # Insert the instructions.
533 for oInstr in self.aoInstructions:
534 if oInstr.sOpcode:
535 idxOpcode = self.getInstructionIndex(oInstr);
536 assert idxOpcode < cTable, str(idxOpcode);
537
538 oExisting = aoTable[idxOpcode];
539 if oExisting is None:
540 aoTable[idxOpcode] = oInstr;
541 elif not isinstance(oExisting, list):
542 aoTable[idxOpcode] = list([oExisting, oInstr]);
543 else:
544 oExisting.append(oInstr);
545
546 return aoTable;
547
548
549 def getDisasTableName(self):
550 """
551 Returns the disassembler table name for this map.
552 """
553 sName = 'g_aDisas';
554 for sWord in self.sName.split('_'):
555 if sWord == 'm': # suffix indicating modrm.mod==mem
556 sName += '_m';
557 elif sWord == 'r': # suffix indicating modrm.mod==reg
558 sName += '_r';
559 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
560 sName += '_' + sWord;
561 else:
562 sWord = sWord.replace('grp', 'Grp');
563 sWord = sWord.replace('map', 'Map');
564 sName += sWord[0].upper() + sWord[1:];
565 return sName;
566
567
568class TestType(object):
569 """
570 Test value type.
571
572 This base class deals with integer like values. The fUnsigned constructor
573 parameter indicates the default stance on zero vs sign extending. It is
574 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
575 """
576 def __init__(self, sName, acbSizes = None, fUnsigned = True):
577 self.sName = sName;
578 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
579 self.fUnsigned = fUnsigned;
580
581 class BadValue(Exception):
582 """ Bad value exception. """
583 def __init__(self, sMessage):
584 Exception.__init__(self, sMessage);
585 self.sMessage = sMessage;
586
587 ## For ascii ~ operator.
588 kdHexInv = {
589 '0': 'f',
590 '1': 'e',
591 '2': 'd',
592 '3': 'c',
593 '4': 'b',
594 '5': 'a',
595 '6': '9',
596 '7': '8',
597 '8': '7',
598 '9': '6',
599 'a': '5',
600 'b': '4',
601 'c': '3',
602 'd': '2',
603 'e': '1',
604 'f': '0',
605 };
606
607 def get(self, sValue):
608 """
609 Get the shortest normal sized byte representation of oValue.
610
611 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
612 The latter form is for AND+OR pairs where the first entry is what to
613 AND with the field and the second the one or OR with.
614
615 Raises BadValue if invalid value.
616 """
617 if not sValue:
618 raise TestType.BadValue('empty value');
619
620 # Deal with sign and detect hexadecimal or decimal.
621 fSignExtend = not self.fUnsigned;
622 if sValue[0] == '-' or sValue[0] == '+':
623 fSignExtend = True;
624 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
625 else:
626 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
627
628 # try convert it to long integer.
629 try:
630 iValue = long(sValue, 16 if fHex else 10);
631 except Exception as oXcpt:
632 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
633
634 # Convert the hex string and pad it to a decent value. Negative values
635 # needs to be manually converted to something non-negative (~-n + 1).
636 if iValue >= 0:
637 sHex = hex(iValue);
638 if sys.version_info[0] < 3:
639 assert sHex[-1] == 'L';
640 sHex = sHex[:-1];
641 assert sHex[:2] == '0x';
642 sHex = sHex[2:];
643 else:
644 sHex = hex(-iValue - 1);
645 if sys.version_info[0] < 3:
646 assert sHex[-1] == 'L';
647 sHex = sHex[:-1];
648 assert sHex[:2] == '0x';
649 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
650 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
651 sHex = 'f' + sHex;
652
653 cDigits = len(sHex);
654 if cDigits <= self.acbSizes[-1] * 2:
655 for cb in self.acbSizes:
656 cNaturalDigits = cb * 2;
657 if cDigits <= cNaturalDigits:
658 break;
659 else:
660 cNaturalDigits = self.acbSizes[-1] * 2;
661 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
662 assert isinstance(cNaturalDigits, int)
663
664 if cNaturalDigits != cDigits:
665 cNeeded = cNaturalDigits - cDigits;
666 if iValue >= 0:
667 sHex = ('0' * cNeeded) + sHex;
668 else:
669 sHex = ('f' * cNeeded) + sHex;
670
671 # Invert and convert to bytearray and return it.
672 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
673
674 return ((fSignExtend, abValue),);
675
676 def validate(self, sValue):
677 """
678 Returns True if value is okay, error message on failure.
679 """
680 try:
681 self.get(sValue);
682 except TestType.BadValue as oXcpt:
683 return oXcpt.sMessage;
684 return True;
685
686 def isAndOrPair(self, sValue):
687 """
688 Checks if sValue is a pair.
689 """
690 _ = sValue;
691 return False;
692
693
694class TestTypeEflags(TestType):
695 """
696 Special value parsing for EFLAGS/RFLAGS/FLAGS.
697 """
698
699 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
700
701 def __init__(self, sName):
702 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
703
704 def get(self, sValue):
705 fClear = 0;
706 fSet = 0;
707 for sFlag in sValue.split(','):
708 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
709 if sConstant is None:
710 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
711 if sConstant[0] == '!':
712 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
713 else:
714 fSet |= g_kdX86EFlagsConstants[sConstant];
715
716 aoSet = TestType.get(self, '0x%x' % (fSet,));
717 if fClear != 0:
718 aoClear = TestType.get(self, '%#x' % (fClear,))
719 assert self.isAndOrPair(sValue) is True;
720 return (aoClear[0], aoSet[0]);
721 assert self.isAndOrPair(sValue) is False;
722 return aoSet;
723
724 def isAndOrPair(self, sValue):
725 for sZeroFlag in self.kdZeroValueFlags:
726 if sValue.find(sZeroFlag) >= 0:
727 return True;
728 return False;
729
730
731
732class TestInOut(object):
733 """
734 One input or output state modifier.
735
736 This should be thought as values to modify BS3REGCTX and extended (needs
737 to be structured) state.
738 """
739 ## Assigned operators.
740 kasOperators = [
741 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
742 '&~=',
743 '&=',
744 '|=',
745 '='
746 ];
747 ## Types
748 kdTypes = {
749 'uint': TestType('uint', fUnsigned = True),
750 'int': TestType('int'),
751 'efl': TestTypeEflags('efl'),
752 };
753 ## CPU context fields.
754 kdFields = {
755 # name: ( default type, [both|input|output], )
756 # Operands.
757 'op1': ( 'uint', 'both', ), ## \@op1
758 'op2': ( 'uint', 'both', ), ## \@op2
759 'op3': ( 'uint', 'both', ), ## \@op3
760 'op4': ( 'uint', 'both', ), ## \@op4
761 # Flags.
762 'efl': ( 'efl', 'both', ),
763 'efl_undef': ( 'uint', 'output', ),
764 # 8-bit GPRs.
765 'al': ( 'uint', 'both', ),
766 'cl': ( 'uint', 'both', ),
767 'dl': ( 'uint', 'both', ),
768 'bl': ( 'uint', 'both', ),
769 'ah': ( 'uint', 'both', ),
770 'ch': ( 'uint', 'both', ),
771 'dh': ( 'uint', 'both', ),
772 'bh': ( 'uint', 'both', ),
773 'r8l': ( 'uint', 'both', ),
774 'r9l': ( 'uint', 'both', ),
775 'r10l': ( 'uint', 'both', ),
776 'r11l': ( 'uint', 'both', ),
777 'r12l': ( 'uint', 'both', ),
778 'r13l': ( 'uint', 'both', ),
779 'r14l': ( 'uint', 'both', ),
780 'r15l': ( 'uint', 'both', ),
781 # 16-bit GPRs.
782 'ax': ( 'uint', 'both', ),
783 'dx': ( 'uint', 'both', ),
784 'cx': ( 'uint', 'both', ),
785 'bx': ( 'uint', 'both', ),
786 'sp': ( 'uint', 'both', ),
787 'bp': ( 'uint', 'both', ),
788 'si': ( 'uint', 'both', ),
789 'di': ( 'uint', 'both', ),
790 'r8w': ( 'uint', 'both', ),
791 'r9w': ( 'uint', 'both', ),
792 'r10w': ( 'uint', 'both', ),
793 'r11w': ( 'uint', 'both', ),
794 'r12w': ( 'uint', 'both', ),
795 'r13w': ( 'uint', 'both', ),
796 'r14w': ( 'uint', 'both', ),
797 'r15w': ( 'uint', 'both', ),
798 # 32-bit GPRs.
799 'eax': ( 'uint', 'both', ),
800 'edx': ( 'uint', 'both', ),
801 'ecx': ( 'uint', 'both', ),
802 'ebx': ( 'uint', 'both', ),
803 'esp': ( 'uint', 'both', ),
804 'ebp': ( 'uint', 'both', ),
805 'esi': ( 'uint', 'both', ),
806 'edi': ( 'uint', 'both', ),
807 'r8d': ( 'uint', 'both', ),
808 'r9d': ( 'uint', 'both', ),
809 'r10d': ( 'uint', 'both', ),
810 'r11d': ( 'uint', 'both', ),
811 'r12d': ( 'uint', 'both', ),
812 'r13d': ( 'uint', 'both', ),
813 'r14d': ( 'uint', 'both', ),
814 'r15d': ( 'uint', 'both', ),
815 # 64-bit GPRs.
816 'rax': ( 'uint', 'both', ),
817 'rdx': ( 'uint', 'both', ),
818 'rcx': ( 'uint', 'both', ),
819 'rbx': ( 'uint', 'both', ),
820 'rsp': ( 'uint', 'both', ),
821 'rbp': ( 'uint', 'both', ),
822 'rsi': ( 'uint', 'both', ),
823 'rdi': ( 'uint', 'both', ),
824 'r8': ( 'uint', 'both', ),
825 'r9': ( 'uint', 'both', ),
826 'r10': ( 'uint', 'both', ),
827 'r11': ( 'uint', 'both', ),
828 'r12': ( 'uint', 'both', ),
829 'r13': ( 'uint', 'both', ),
830 'r14': ( 'uint', 'both', ),
831 'r15': ( 'uint', 'both', ),
832 # 16-bit, 32-bit or 64-bit registers according to operand size.
833 'oz.rax': ( 'uint', 'both', ),
834 'oz.rdx': ( 'uint', 'both', ),
835 'oz.rcx': ( 'uint', 'both', ),
836 'oz.rbx': ( 'uint', 'both', ),
837 'oz.rsp': ( 'uint', 'both', ),
838 'oz.rbp': ( 'uint', 'both', ),
839 'oz.rsi': ( 'uint', 'both', ),
840 'oz.rdi': ( 'uint', 'both', ),
841 'oz.r8': ( 'uint', 'both', ),
842 'oz.r9': ( 'uint', 'both', ),
843 'oz.r10': ( 'uint', 'both', ),
844 'oz.r11': ( 'uint', 'both', ),
845 'oz.r12': ( 'uint', 'both', ),
846 'oz.r13': ( 'uint', 'both', ),
847 'oz.r14': ( 'uint', 'both', ),
848 'oz.r15': ( 'uint', 'both', ),
849 # Special ones.
850 'value.xcpt': ( 'uint', 'output', ),
851 };
852
853 def __init__(self, sField, sOp, sValue, sType):
854 assert sField in self.kdFields;
855 assert sOp in self.kasOperators;
856 self.sField = sField;
857 self.sOp = sOp;
858 self.sValue = sValue;
859 self.sType = sType;
860 assert isinstance(sField, str);
861 assert isinstance(sOp, str);
862 assert isinstance(sType, str);
863 assert isinstance(sValue, str);
864
865
866class TestSelector(object):
867 """
868 One selector for an instruction test.
869 """
870 ## Selector compare operators.
871 kasCompareOps = [ '==', '!=' ];
872 ## Selector variables and their valid values.
873 kdVariables = {
874 # Operand size.
875 'size': {
876 'o16': 'size_o16',
877 'o32': 'size_o32',
878 'o64': 'size_o64',
879 },
880 # Execution ring.
881 'ring': {
882 '0': 'ring_0',
883 '1': 'ring_1',
884 '2': 'ring_2',
885 '3': 'ring_3',
886 '0..2': 'ring_0_thru_2',
887 '1..3': 'ring_1_thru_3',
888 },
889 # Basic code mode.
890 'codebits': {
891 '64': 'code_64bit',
892 '32': 'code_32bit',
893 '16': 'code_16bit',
894 },
895 # cpu modes.
896 'mode': {
897 'real': 'mode_real',
898 'prot': 'mode_prot',
899 'long': 'mode_long',
900 'v86': 'mode_v86',
901 'smm': 'mode_smm',
902 'vmx': 'mode_vmx',
903 'svm': 'mode_svm',
904 },
905 # paging on/off
906 'paging': {
907 'on': 'paging_on',
908 'off': 'paging_off',
909 },
910 };
911 ## Selector shorthand predicates.
912 ## These translates into variable expressions.
913 kdPredicates = {
914 'o16': 'size==o16',
915 'o32': 'size==o32',
916 'o64': 'size==o64',
917 'ring0': 'ring==0',
918 '!ring0': 'ring==1..3',
919 'ring1': 'ring==1',
920 'ring2': 'ring==2',
921 'ring3': 'ring==3',
922 'user': 'ring==3',
923 'supervisor': 'ring==0..2',
924 'real': 'mode==real',
925 'prot': 'mode==prot',
926 'long': 'mode==long',
927 'v86': 'mode==v86',
928 'smm': 'mode==smm',
929 'vmx': 'mode==vmx',
930 'svm': 'mode==svm',
931 'paging': 'paging==on',
932 '!paging': 'paging==off',
933 };
934
935 def __init__(self, sVariable, sOp, sValue):
936 assert sVariable in self.kdVariables;
937 assert sOp in self.kasCompareOps;
938 assert sValue in self.kdVariables[sVariable];
939 self.sVariable = sVariable;
940 self.sOp = sOp;
941 self.sValue = sValue;
942
943
944class InstructionTest(object):
945 """
946 Instruction test.
947 """
948
949 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
950 self.oInstr = oInstr; # type: InstructionTest
951 self.aoInputs = []; # type: list(TestInOut)
952 self.aoOutputs = []; # type: list(TestInOut)
953 self.aoSelectors = []; # type: list(TestSelector)
954
955 def toString(self, fRepr = False):
956 """
957 Converts it to string representation.
958 """
959 asWords = [];
960 if self.aoSelectors:
961 for oSelector in self.aoSelectors:
962 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
963 asWords.append('/');
964
965 for oModifier in self.aoInputs:
966 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
967
968 asWords.append('->');
969
970 for oModifier in self.aoOutputs:
971 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
972
973 if fRepr:
974 return '<' + ' '.join(asWords) + '>';
975 return ' '.join(asWords);
976
977 def __str__(self):
978 """ Provide string represenation. """
979 return self.toString(False);
980
981 def __repr__(self):
982 """ Provide unambigious string representation. """
983 return self.toString(True);
984
985class Operand(object):
986 """
987 Instruction operand.
988 """
989
990 def __init__(self, sWhere, sType):
991 assert sWhere in g_kdOpLocations, sWhere;
992 assert sType in g_kdOpTypes, sType;
993 self.sWhere = sWhere; ##< g_kdOpLocations
994 self.sType = sType; ##< g_kdOpTypes
995
996 def usesModRM(self):
997 """ Returns True if using some form of ModR/M encoding. """
998 return self.sType[0] in ['E', 'G', 'M'];
999
1000
1001
1002class Instruction(object): # pylint: disable=too-many-instance-attributes
1003 """
1004 Instruction.
1005 """
1006
1007 def __init__(self, sSrcFile, iLine):
1008 ## @name Core attributes.
1009 ## @{
1010 self.sMnemonic = None;
1011 self.sBrief = None;
1012 self.asDescSections = []; # type: list(str)
1013 self.aoMaps = []; # type: list(InstructionMap)
1014 self.aoOperands = []; # type: list(Operand)
1015 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1016 self.sOpcode = None; # type: str
1017 self.sSubOpcode = None; # type: str
1018 self.sEncoding = None;
1019 self.asFlTest = None;
1020 self.asFlModify = None;
1021 self.asFlUndefined = None;
1022 self.asFlSet = None;
1023 self.asFlClear = None;
1024 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1025 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1026 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1027 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1028 self.aoTests = []; # type: list(InstructionTest)
1029 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1030 self.oCpuExpr = None; ##< Some CPU restriction expression...
1031 self.sGroup = None;
1032 self.fUnused = False; ##< Unused instruction.
1033 self.fInvalid = False; ##< Invalid instruction (like UD2).
1034 self.sInvalidStyle = None; ##< Invalid behviour style
1035 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1036 ## @}
1037
1038 ## @name Implementation attributes.
1039 ## @{
1040 self.sStats = None;
1041 self.sFunction = None;
1042 self.fStub = False;
1043 self.fUdStub = False;
1044 ## @}
1045
1046 ## @name Decoding info
1047 ## @{
1048 self.sSrcFile = sSrcFile;
1049 self.iLineCreated = iLine;
1050 self.iLineCompleted = None;
1051 self.cOpTags = 0;
1052 self.iLineFnIemOpMacro = -1;
1053 self.iLineMnemonicMacro = -1;
1054 ## @}
1055
1056 ## @name Intermediate input fields.
1057 ## @{
1058 self.sRawDisOpNo = None;
1059 self.asRawDisParams = [];
1060 self.sRawIemOpFlags = None;
1061 self.sRawOldOpcodes = None;
1062 self.asCopyTests = [];
1063 ## @}
1064
1065 def toString(self, fRepr = False):
1066 """ Turn object into a string. """
1067 aasFields = [];
1068
1069 aasFields.append(['opcode', self.sOpcode]);
1070 aasFields.append(['mnemonic', self.sMnemonic]);
1071 for iOperand, oOperand in enumerate(self.aoOperands):
1072 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1073 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1074 aasFields.append(['encoding', self.sEncoding]);
1075 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1076 aasFields.append(['disenum', self.sDisEnum]);
1077 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1078 aasFields.append(['group', self.sGroup]);
1079 if self.fUnused: aasFields.append(['unused', 'True']);
1080 if self.fInvalid: aasFields.append(['invalid', 'True']);
1081 aasFields.append(['invlstyle', self.sInvalidStyle]);
1082 aasFields.append(['fltest', self.asFlTest]);
1083 aasFields.append(['flmodify', self.asFlModify]);
1084 aasFields.append(['flundef', self.asFlUndefined]);
1085 aasFields.append(['flset', self.asFlSet]);
1086 aasFields.append(['flclear', self.asFlClear]);
1087 aasFields.append(['mincpu', self.sMinCpu]);
1088 aasFields.append(['stats', self.sStats]);
1089 aasFields.append(['sFunction', self.sFunction]);
1090 if self.fStub: aasFields.append(['fStub', 'True']);
1091 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1092 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1093 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1094 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1095
1096 sRet = '<' if fRepr else '';
1097 for sField, sValue in aasFields:
1098 if sValue != None:
1099 if len(sRet) > 1:
1100 sRet += '; ';
1101 sRet += '%s=%s' % (sField, sValue,);
1102 if fRepr:
1103 sRet += '>';
1104
1105 return sRet;
1106
1107 def __str__(self):
1108 """ Provide string represenation. """
1109 return self.toString(False);
1110
1111 def __repr__(self):
1112 """ Provide unambigious string representation. """
1113 return self.toString(True);
1114
1115 def getOpcodeByte(self):
1116 """
1117 Decodes sOpcode into a byte range integer value.
1118 Raises exception if sOpcode is None or invalid.
1119 """
1120 if self.sOpcode is None:
1121 raise Exception('No opcode byte for %s!' % (self,));
1122 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1123
1124 # Full hex byte form.
1125 if sOpcode[:2] == '0x':
1126 return int(sOpcode, 16);
1127
1128 # The /r form:
1129 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
1130 return int(sOpcode[1:]) << 3;
1131
1132 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1133
1134 @staticmethod
1135 def _flagsToIntegerMask(asFlags):
1136 """
1137 Returns the integer mask value for asFlags.
1138 """
1139 uRet = 0;
1140 if asFlags:
1141 for sFlag in asFlags:
1142 sConstant = g_kdEFlagsMnemonics[sFlag];
1143 assert sConstant[0] != '!', sConstant
1144 uRet |= g_kdX86EFlagsConstants[sConstant];
1145 return uRet;
1146
1147 def getTestedFlagsMask(self):
1148 """ Returns asFlTest into a integer mask value """
1149 return self._flagsToIntegerMask(self.asFlTest);
1150
1151 def getModifiedFlagsMask(self):
1152 """ Returns asFlModify into a integer mask value """
1153 return self._flagsToIntegerMask(self.asFlModify);
1154
1155 def getUndefinedFlagsMask(self):
1156 """ Returns asFlUndefined into a integer mask value """
1157 return self._flagsToIntegerMask(self.asFlUndefined);
1158
1159 def getSetFlagsMask(self):
1160 """ Returns asFlSet into a integer mask value """
1161 return self._flagsToIntegerMask(self.asFlSet);
1162
1163 def getClearedFlagsMask(self):
1164 """ Returns asFlClear into a integer mask value """
1165 return self._flagsToIntegerMask(self.asFlClear);
1166
1167
1168## All the instructions.
1169g_aoAllInstructions = []; # type: list(Instruction)
1170
1171## All the instructions indexed by statistics name (opstat).
1172g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1173
1174## All the instructions indexed by function name (opfunction).
1175g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1176
1177## Instructions tagged by oponlytest
1178g_aoOnlyTestInstructions = []; # type: list(Instruction)
1179
1180## Instruction maps.
1181g_dInstructionMaps = {
1182 'one': InstructionMap('one'),
1183 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1184 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1185 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1186 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1187 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1188 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1189 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1190 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1191 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1192 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1193 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1194 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1195 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1196 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1197 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1198 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1199 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1200 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1201 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1202
1203 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1204 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1205 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1206 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1207 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1208 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1209 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1210 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1211 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1212 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1213 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1214 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1215 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1216 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1217
1218 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1219 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1220
1221 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1222 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1223 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1224 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1225 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1226 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1227
1228 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1229 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1230
1231 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1232 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1233 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1234 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1235 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1236 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1237 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1238};
1239
1240
1241
1242class ParserException(Exception):
1243 """ Parser exception """
1244 def __init__(self, sMessage):
1245 Exception.__init__(self, sMessage);
1246
1247
1248class SimpleParser(object):
1249 """
1250 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1251 """
1252
1253 ## @name Parser state.
1254 ## @{
1255 kiCode = 0;
1256 kiCommentMulti = 1;
1257 ## @}
1258
1259 def __init__(self, sSrcFile, asLines, sDefaultMap):
1260 self.sSrcFile = sSrcFile;
1261 self.asLines = asLines;
1262 self.iLine = 0;
1263 self.iState = self.kiCode;
1264 self.sComment = '';
1265 self.iCommentLine = 0;
1266 self.aoCurInstrs = [];
1267
1268 assert sDefaultMap in g_dInstructionMaps;
1269 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1270
1271 self.cTotalInstr = 0;
1272 self.cTotalStubs = 0;
1273 self.cTotalTagged = 0;
1274
1275 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1276 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1277 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1278 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1279 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1280 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1281 self.fDebug = True;
1282
1283 self.dTagHandlers = {
1284 '@opbrief': self.parseTagOpBrief,
1285 '@opdesc': self.parseTagOpDesc,
1286 '@opmnemonic': self.parseTagOpMnemonic,
1287 '@op1': self.parseTagOpOperandN,
1288 '@op2': self.parseTagOpOperandN,
1289 '@op3': self.parseTagOpOperandN,
1290 '@op4': self.parseTagOpOperandN,
1291 '@oppfx': self.parseTagOpPfx,
1292 '@opmaps': self.parseTagOpMaps,
1293 '@opcode': self.parseTagOpcode,
1294 '@opcodesub': self.parseTagOpcodeSub,
1295 '@openc': self.parseTagOpEnc,
1296 '@opfltest': self.parseTagOpEFlags,
1297 '@opflmodify': self.parseTagOpEFlags,
1298 '@opflundef': self.parseTagOpEFlags,
1299 '@opflset': self.parseTagOpEFlags,
1300 '@opflclear': self.parseTagOpEFlags,
1301 '@ophints': self.parseTagOpHints,
1302 '@opdisenum': self.parseTagOpDisEnum,
1303 '@opmincpu': self.parseTagOpMinCpu,
1304 '@opcpuid': self.parseTagOpCpuId,
1305 '@opgroup': self.parseTagOpGroup,
1306 '@opunused': self.parseTagOpUnusedInvalid,
1307 '@opinvalid': self.parseTagOpUnusedInvalid,
1308 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1309 '@optest': self.parseTagOpTest,
1310 '@optestign': self.parseTagOpTestIgnore,
1311 '@optestignore': self.parseTagOpTestIgnore,
1312 '@opcopytests': self.parseTagOpCopyTests,
1313 '@oponlytest': self.parseTagOpOnlyTest,
1314 '@opxcpttype': self.parseTagOpXcptType,
1315 '@opstats': self.parseTagOpStats,
1316 '@opfunction': self.parseTagOpFunction,
1317 '@opdone': self.parseTagOpDone,
1318 };
1319
1320 self.asErrors = [];
1321
1322 def raiseError(self, sMessage):
1323 """
1324 Raise error prefixed with the source and line number.
1325 """
1326 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1327
1328 def raiseCommentError(self, iLineInComment, sMessage):
1329 """
1330 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1331 """
1332 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1333
1334 def error(self, sMessage):
1335 """
1336 Adds an error.
1337 returns False;
1338 """
1339 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1340 return False;
1341
1342 def errorComment(self, iLineInComment, sMessage):
1343 """
1344 Adds a comment error.
1345 returns False;
1346 """
1347 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1348 return False;
1349
1350 def printErrors(self):
1351 """
1352 Print the errors to stderr.
1353 Returns number of errors.
1354 """
1355 if self.asErrors:
1356 sys.stderr.write(u''.join(self.asErrors));
1357 return len(self.asErrors);
1358
1359 def debug(self, sMessage):
1360 """
1361 For debugging.
1362 """
1363 if self.fDebug:
1364 print('debug: %s' % (sMessage,));
1365
1366
1367 def addInstruction(self, iLine = None):
1368 """
1369 Adds an instruction.
1370 """
1371 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1372 g_aoAllInstructions.append(oInstr);
1373 self.aoCurInstrs.append(oInstr);
1374 return oInstr;
1375
1376 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1377 """
1378 Derives the mnemonic and operands from a IEM stats base name like string.
1379 """
1380 if oInstr.sMnemonic is None:
1381 asWords = sStats.split('_');
1382 oInstr.sMnemonic = asWords[0].lower();
1383 if len(asWords) > 1 and not oInstr.aoOperands:
1384 for sType in asWords[1:]:
1385 if sType in g_kdOpTypes:
1386 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1387 else:
1388 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1389 return False;
1390 return True;
1391
1392 def doneInstructionOne(self, oInstr, iLine):
1393 """
1394 Complete the parsing by processing, validating and expanding raw inputs.
1395 """
1396 assert oInstr.iLineCompleted is None;
1397 oInstr.iLineCompleted = iLine;
1398
1399 #
1400 # Specified instructions.
1401 #
1402 if oInstr.cOpTags > 0:
1403 if oInstr.sStats is None:
1404 pass;
1405
1406 #
1407 # Unspecified legacy stuff. We generally only got a few things to go on here.
1408 # /** Opcode 0x0f 0x00 /0. */
1409 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1410 #
1411 else:
1412 #if oInstr.sRawOldOpcodes:
1413 #
1414 #if oInstr.sMnemonic:
1415 pass;
1416
1417 #
1418 # Common defaults.
1419 #
1420
1421 # Guess mnemonic and operands from stats if the former is missing.
1422 if oInstr.sMnemonic is None:
1423 if oInstr.sStats is not None:
1424 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1425 elif oInstr.sFunction is not None:
1426 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1427
1428 # Derive the disassembler op enum constant from the mnemonic.
1429 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1430 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1431
1432 # Derive the IEM statistics base name from mnemonic and operand types.
1433 if oInstr.sStats is None:
1434 if oInstr.sFunction is not None:
1435 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1436 elif oInstr.sMnemonic is not None:
1437 oInstr.sStats = oInstr.sMnemonic;
1438 for oOperand in oInstr.aoOperands:
1439 if oOperand.sType:
1440 oInstr.sStats += '_' + oOperand.sType;
1441
1442 # Derive the IEM function name from mnemonic and operand types.
1443 if oInstr.sFunction is None:
1444 if oInstr.sMnemonic is not None:
1445 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1446 for oOperand in oInstr.aoOperands:
1447 if oOperand.sType:
1448 oInstr.sFunction += '_' + oOperand.sType;
1449 elif oInstr.sStats:
1450 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1451
1452 # Derive encoding from operands.
1453 if oInstr.sEncoding is None:
1454 if not oInstr.aoOperands:
1455 if oInstr.fUnused and oInstr.sSubOpcode:
1456 oInstr.sEncoding = 'ModR/M';
1457 else:
1458 oInstr.sEncoding = 'fixed';
1459 elif oInstr.aoOperands[0].usesModRM():
1460 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1461 oInstr.sEncoding = 'ModR/M+VEX';
1462 else:
1463 oInstr.sEncoding = 'ModR/M';
1464
1465 #
1466 # Apply default map and then add the instruction to all it's groups.
1467 #
1468 if not oInstr.aoMaps:
1469 oInstr.aoMaps = [ self.oDefaultMap, ];
1470 for oMap in oInstr.aoMaps:
1471 oMap.aoInstructions.append(oInstr);
1472
1473 #
1474 # Check the opstat value and add it to the opstat indexed dictionary.
1475 #
1476 if oInstr.sStats:
1477 if oInstr.sStats not in g_dAllInstructionsByStat:
1478 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1479 else:
1480 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1481 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1482
1483 #
1484 # Add to function indexed dictionary. We allow multiple instructions per function.
1485 #
1486 if oInstr.sFunction:
1487 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1488 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1489 else:
1490 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1491
1492 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1493 return True;
1494
1495 def doneInstructions(self, iLineInComment = None):
1496 """
1497 Done with current instruction.
1498 """
1499 for oInstr in self.aoCurInstrs:
1500 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1501 if oInstr.fStub:
1502 self.cTotalStubs += 1;
1503
1504 self.cTotalInstr += len(self.aoCurInstrs);
1505
1506 self.sComment = '';
1507 self.aoCurInstrs = [];
1508 return True;
1509
1510 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1511 """
1512 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1513 is False, only None values and empty strings are replaced.
1514 """
1515 for oInstr in self.aoCurInstrs:
1516 if fOverwrite is not True:
1517 oOldValue = getattr(oInstr, sAttrib);
1518 if oOldValue is not None:
1519 continue;
1520 setattr(oInstr, sAttrib, oValue);
1521
1522 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1523 """
1524 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1525 If fOverwrite is False, only None values and empty strings are replaced.
1526 """
1527 for oInstr in self.aoCurInstrs:
1528 aoArray = getattr(oInstr, sAttrib);
1529 while len(aoArray) <= iEntry:
1530 aoArray.append(None);
1531 if fOverwrite is True or aoArray[iEntry] is None:
1532 aoArray[iEntry] = oValue;
1533
1534 def parseCommentOldOpcode(self, asLines):
1535 """ Deals with 'Opcode 0xff /4' like comments """
1536 asWords = asLines[0].split();
1537 if len(asWords) >= 2 \
1538 and asWords[0] == 'Opcode' \
1539 and ( asWords[1].startswith('0x')
1540 or asWords[1].startswith('0X')):
1541 asWords = asWords[:1];
1542 for iWord, sWord in enumerate(asWords):
1543 if sWord.startswith('0X'):
1544 sWord = '0x' + sWord[:2];
1545 asWords[iWord] = asWords;
1546 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1547
1548 return False;
1549
1550 def ensureInstructionForOpTag(self, iTagLine):
1551 """ Ensure there is an instruction for the op-tag being parsed. """
1552 if not self.aoCurInstrs:
1553 self.addInstruction(self.iCommentLine + iTagLine);
1554 for oInstr in self.aoCurInstrs:
1555 oInstr.cOpTags += 1;
1556 if oInstr.cOpTags == 1:
1557 self.cTotalTagged += 1;
1558 return self.aoCurInstrs[-1];
1559
1560 @staticmethod
1561 def flattenSections(aasSections):
1562 """
1563 Flattens multiline sections into stripped single strings.
1564 Returns list of strings, on section per string.
1565 """
1566 asRet = [];
1567 for asLines in aasSections:
1568 if asLines:
1569 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1570 return asRet;
1571
1572 @staticmethod
1573 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1574 """
1575 Flattens sections into a simple stripped string with newlines as
1576 section breaks. The final section does not sport a trailing newline.
1577 """
1578 # Typical: One section with a single line.
1579 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1580 return aasSections[0][0].strip();
1581
1582 sRet = '';
1583 for iSection, asLines in enumerate(aasSections):
1584 if asLines:
1585 if iSection > 0:
1586 sRet += sSectionSep;
1587 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1588 return sRet;
1589
1590
1591
1592 ## @name Tag parsers
1593 ## @{
1594
1595 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1596 """
1597 Tag: \@opbrief
1598 Value: Text description, multiple sections, appended.
1599
1600 Brief description. If not given, it's the first sentence from @opdesc.
1601 """
1602 oInstr = self.ensureInstructionForOpTag(iTagLine);
1603
1604 # Flatten and validate the value.
1605 sBrief = self.flattenAllSections(aasSections);
1606 if not sBrief:
1607 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1608 if sBrief[-1] != '.':
1609 sBrief = sBrief + '.';
1610 if len(sBrief) > 180:
1611 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1612 offDot = sBrief.find('.');
1613 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1614 offDot = sBrief.find('.', offDot + 1);
1615 if offDot >= 0 and offDot != len(sBrief) - 1:
1616 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1617
1618 # Update the instruction.
1619 if oInstr.sBrief is not None:
1620 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1621 % (sTag, oInstr.sBrief, sBrief,));
1622 _ = iEndLine;
1623 return True;
1624
1625 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1626 """
1627 Tag: \@opdesc
1628 Value: Text description, multiple sections, appended.
1629
1630 It is used to describe instructions.
1631 """
1632 oInstr = self.ensureInstructionForOpTag(iTagLine);
1633 if aasSections:
1634 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1635 return True;
1636
1637 _ = sTag; _ = iEndLine;
1638 return True;
1639
1640 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1641 """
1642 Tag: @opmenmonic
1643 Value: mnemonic
1644
1645 The 'mnemonic' value must be a valid C identifier string. Because of
1646 prefixes, groups and whatnot, there times when the mnemonic isn't that
1647 of an actual assembler mnemonic.
1648 """
1649 oInstr = self.ensureInstructionForOpTag(iTagLine);
1650
1651 # Flatten and validate the value.
1652 sMnemonic = self.flattenAllSections(aasSections);
1653 if not self.oReMnemonic.match(sMnemonic):
1654 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1655 if oInstr.sMnemonic is not None:
1656 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1657 % (sTag, oInstr.sMnemonic, sMnemonic,));
1658 oInstr.sMnemonic = sMnemonic
1659
1660 _ = iEndLine;
1661 return True;
1662
1663 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1664 """
1665 Tags: \@op1, \@op2, \@op3, \@op4
1666 Value: [where:]type
1667
1668 The 'where' value indicates where the operand is found, like the 'reg'
1669 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1670 a list.
1671
1672 The 'type' value indicates the operand type. These follow the types
1673 given in the opcode tables in the CPU reference manuals.
1674 See Instruction.kdOperandTypes for a list.
1675
1676 """
1677 oInstr = self.ensureInstructionForOpTag(iTagLine);
1678 idxOp = int(sTag[-1]) - 1;
1679 assert idxOp >= 0 and idxOp < 4;
1680
1681 # flatten, split up, and validate the "where:type" value.
1682 sFlattened = self.flattenAllSections(aasSections);
1683 asSplit = sFlattened.split(':');
1684 if len(asSplit) == 1:
1685 sType = asSplit[0];
1686 sWhere = None;
1687 elif len(asSplit) == 2:
1688 (sWhere, sType) = asSplit;
1689 else:
1690 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1691
1692 if sType not in g_kdOpTypes:
1693 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1694 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1695 if sWhere is None:
1696 sWhere = g_kdOpTypes[sType][1];
1697 elif sWhere not in g_kdOpLocations:
1698 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1699 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1700
1701 # Insert the operand, refusing to overwrite an existing one.
1702 while idxOp >= len(oInstr.aoOperands):
1703 oInstr.aoOperands.append(None);
1704 if oInstr.aoOperands[idxOp] is not None:
1705 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1706 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1707 sWhere, sType,));
1708 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1709
1710 _ = iEndLine;
1711 return True;
1712
1713 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1714 """
1715 Tag: \@opmaps
1716 Value: map[,map2]
1717
1718 Indicates which maps the instruction is in. There is a default map
1719 associated with each input file.
1720 """
1721 oInstr = self.ensureInstructionForOpTag(iTagLine);
1722
1723 # Flatten, split up and validate the value.
1724 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1725 asMaps = sFlattened.split(',');
1726 if not asMaps:
1727 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1728 for sMap in asMaps:
1729 if sMap not in g_dInstructionMaps:
1730 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1731 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1732
1733 # Add the maps to the current list. Throw errors on duplicates.
1734 for oMap in oInstr.aoMaps:
1735 if oMap.sName in asMaps:
1736 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1737
1738 for sMap in asMaps:
1739 oMap = g_dInstructionMaps[sMap];
1740 if oMap not in oInstr.aoMaps:
1741 oInstr.aoMaps.append(oMap);
1742 else:
1743 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1744
1745 _ = iEndLine;
1746 return True;
1747
1748 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1749 """
1750 Tag: \@oppfx
1751 Value: n/a|none|0x66|0xf3|0xf2
1752
1753 Required prefix for the instruction. (In a (E)VEX context this is the
1754 value of the 'pp' field rather than an actual prefix.)
1755 """
1756 oInstr = self.ensureInstructionForOpTag(iTagLine);
1757
1758 # Flatten and validate the value.
1759 sFlattened = self.flattenAllSections(aasSections);
1760 asPrefixes = sFlattened.split();
1761 if len(asPrefixes) > 1:
1762 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1763
1764 sPrefix = asPrefixes[0].lower();
1765 if sPrefix == 'none':
1766 sPrefix = 'none';
1767 elif sPrefix == 'n/a':
1768 sPrefix = None;
1769 else:
1770 if len(sPrefix) == 2:
1771 sPrefix = '0x' + sPrefix;
1772 if not _isValidOpcodeByte(sPrefix):
1773 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1774
1775 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1776 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1777
1778 # Set it.
1779 if oInstr.sPrefix is not None:
1780 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1781 oInstr.sPrefix = sPrefix;
1782
1783 _ = iEndLine;
1784 return True;
1785
1786 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1787 """
1788 Tag: \@opcode
1789 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
1790
1791 The opcode byte or sub-byte for the instruction in the context of a map.
1792 """
1793 oInstr = self.ensureInstructionForOpTag(iTagLine);
1794
1795 # Flatten and validate the value.
1796 sOpcode = self.flattenAllSections(aasSections);
1797 if _isValidOpcodeByte(sOpcode):
1798 pass;
1799 elif len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1] in '012345678':
1800 pass;
1801 else:
1802 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1803
1804 # Set it.
1805 if oInstr.sOpcode is not None:
1806 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1807 oInstr.sOpcode = sOpcode;
1808
1809 _ = iEndLine;
1810 return True;
1811
1812 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
1813 """
1814 Tag: \@opcodesub
1815 Value: none | 11 mr/reg | !11 mr/reg
1816
1817 This is a simple way of dealing with encodings where the mod=3 and mod!=3
1818 represents exactly two different instructions. The more proper way would
1819 be to go via maps with two members, but this is faster.
1820 """
1821 oInstr = self.ensureInstructionForOpTag(iTagLine);
1822
1823 # Flatten and validate the value.
1824 sSubOpcode = self.flattenAllSections(aasSections);
1825 if sSubOpcode not in g_kdSubOpcodes:
1826 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
1827 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
1828
1829 # Set it.
1830 if oInstr.sSubOpcode is not None:
1831 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1832 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
1833 oInstr.sSubOpcode = sSubOpcode;
1834
1835 _ = iEndLine;
1836 return True;
1837
1838 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1839 """
1840 Tag: \@openc
1841 Value: ModR/M|fixed|prefix|<map name>
1842
1843 The instruction operand encoding style.
1844 """
1845 oInstr = self.ensureInstructionForOpTag(iTagLine);
1846
1847 # Flatten and validate the value.
1848 sEncoding = self.flattenAllSections(aasSections);
1849 if sEncoding in g_kdEncodings:
1850 pass;
1851 elif sEncoding in g_dInstructionMaps:
1852 pass;
1853 elif not _isValidOpcodeByte(sEncoding):
1854 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1855
1856 # Set it.
1857 if oInstr.sEncoding is not None:
1858 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1859 % ( sTag, oInstr.sEncoding, sEncoding,));
1860 oInstr.sEncoding = sEncoding;
1861
1862 _ = iEndLine;
1863 return True;
1864
1865 ## EFlags tag to Instruction attribute name.
1866 kdOpFlagToAttr = {
1867 '@opfltest': 'asFlTest',
1868 '@opflmodify': 'asFlModify',
1869 '@opflundef': 'asFlUndefined',
1870 '@opflset': 'asFlSet',
1871 '@opflclear': 'asFlClear',
1872 };
1873
1874 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1875 """
1876 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1877 Value: <eflags specifier>
1878
1879 """
1880 oInstr = self.ensureInstructionForOpTag(iTagLine);
1881
1882 # Flatten, split up and validate the values.
1883 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1884 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1885 asFlags = [];
1886 else:
1887 fRc = True;
1888 for iFlag, sFlag in enumerate(asFlags):
1889 if sFlag not in g_kdEFlagsMnemonics:
1890 if sFlag.strip() in g_kdEFlagsMnemonics:
1891 asFlags[iFlag] = sFlag.strip();
1892 else:
1893 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1894 if not fRc:
1895 return False;
1896
1897 # Set them.
1898 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1899 if asOld is not None:
1900 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1901 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1902
1903 _ = iEndLine;
1904 return True;
1905
1906 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1907 """
1908 Tag: \@ophints
1909 Value: Comma or space separated list of flags and hints.
1910
1911 This covers the disassembler flags table and more.
1912 """
1913 oInstr = self.ensureInstructionForOpTag(iTagLine);
1914
1915 # Flatten as a space separated list, split it up and validate the values.
1916 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1917 if len(asHints) == 1 and asHints[0].lower() == 'none':
1918 asHints = [];
1919 else:
1920 fRc = True;
1921 for iHint, sHint in enumerate(asHints):
1922 if sHint not in g_kdHints:
1923 if sHint.strip() in g_kdHints:
1924 sHint[iHint] = sHint.strip();
1925 else:
1926 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1927 if not fRc:
1928 return False;
1929
1930 # Append them.
1931 for sHint in asHints:
1932 if sHint not in oInstr.dHints:
1933 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1934 else:
1935 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1936
1937 _ = iEndLine;
1938 return True;
1939
1940 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1941 """
1942 Tag: \@opdisenum
1943 Value: OP_XXXX
1944
1945 This is for select a specific (legacy) disassembler enum value for the
1946 instruction.
1947 """
1948 oInstr = self.ensureInstructionForOpTag(iTagLine);
1949
1950 # Flatten and split.
1951 asWords = self.flattenAllSections(aasSections).split();
1952 if len(asWords) != 1:
1953 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1954 if not asWords:
1955 return False;
1956 sDisEnum = asWords[0];
1957 if not self.oReDisEnum.match(sDisEnum):
1958 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1959 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1960
1961 # Set it.
1962 if oInstr.sDisEnum is not None:
1963 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1964 oInstr.sDisEnum = sDisEnum;
1965
1966 _ = iEndLine;
1967 return True;
1968
1969 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1970 """
1971 Tag: \@opmincpu
1972 Value: <simple CPU name>
1973
1974 Indicates when this instruction was introduced.
1975 """
1976 oInstr = self.ensureInstructionForOpTag(iTagLine);
1977
1978 # Flatten the value, split into words, make sure there's just one, valid it.
1979 asCpus = self.flattenAllSections(aasSections).split();
1980 if len(asCpus) > 1:
1981 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1982
1983 sMinCpu = asCpus[0];
1984 if sMinCpu in g_kdCpuNames:
1985 oInstr.sMinCpu = sMinCpu;
1986 else:
1987 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1988 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1989
1990 # Set it.
1991 if oInstr.sMinCpu is None:
1992 oInstr.sMinCpu = sMinCpu;
1993 elif oInstr.sMinCpu != sMinCpu:
1994 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1995
1996 _ = iEndLine;
1997 return True;
1998
1999 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2000 """
2001 Tag: \@opcpuid
2002 Value: none | <CPUID flag specifier>
2003
2004 CPUID feature bit which is required for the instruction to be present.
2005 """
2006 oInstr = self.ensureInstructionForOpTag(iTagLine);
2007
2008 # Flatten as a space separated list, split it up and validate the values.
2009 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2010 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2011 asCpuIds = [];
2012 else:
2013 fRc = True;
2014 for iCpuId, sCpuId in enumerate(asCpuIds):
2015 if sCpuId not in g_kdCpuIdFlags:
2016 if sCpuId.strip() in g_kdCpuIdFlags:
2017 sCpuId[iCpuId] = sCpuId.strip();
2018 else:
2019 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2020 if not fRc:
2021 return False;
2022
2023 # Append them.
2024 for sCpuId in asCpuIds:
2025 if sCpuId not in oInstr.asCpuIds:
2026 oInstr.asCpuIds.append(sCpuId);
2027 else:
2028 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2029
2030 _ = iEndLine;
2031 return True;
2032
2033 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2034 """
2035 Tag: \@opgroup
2036 Value: op_grp1[_subgrp2[_subsubgrp3]]
2037
2038 Instruction grouping.
2039 """
2040 oInstr = self.ensureInstructionForOpTag(iTagLine);
2041
2042 # Flatten as a space separated list, split it up and validate the values.
2043 asGroups = self.flattenAllSections(aasSections).split();
2044 if len(asGroups) != 1:
2045 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2046 sGroup = asGroups[0];
2047 if not self.oReGroupName.match(sGroup):
2048 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2049 % (sTag, sGroup, self.oReGroupName.pattern));
2050
2051 # Set it.
2052 if oInstr.sGroup is not None:
2053 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2054 oInstr.sGroup = sGroup;
2055
2056 _ = iEndLine;
2057 return True;
2058
2059 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2060 """
2061 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2062 Value: <invalid opcode behaviour style>
2063
2064 The \@opunused indicates the specification is for a currently unused
2065 instruction encoding.
2066
2067 The \@opinvalid indicates the specification is for an invalid currently
2068 instruction encoding (like UD2).
2069
2070 The \@opinvlstyle just indicates how CPUs decode the instruction when
2071 not supported (\@opcpuid, \@opmincpu) or disabled.
2072 """
2073 oInstr = self.ensureInstructionForOpTag(iTagLine);
2074
2075 # Flatten as a space separated list, split it up and validate the values.
2076 asStyles = self.flattenAllSections(aasSections).split();
2077 if len(asStyles) != 1:
2078 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2079 sStyle = asStyles[0];
2080 if sStyle not in g_kdInvalidStyles:
2081 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2082 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2083 # Set it.
2084 if oInstr.sInvalidStyle is not None:
2085 return self.errorComment(iTagLine,
2086 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2087 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2088 oInstr.sInvalidStyle = sStyle;
2089 if sTag == '@opunused':
2090 oInstr.fUnused = True;
2091 elif sTag == '@opinvalid':
2092 oInstr.fInvalid = True;
2093
2094 _ = iEndLine;
2095 return True;
2096
2097 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2098 """
2099 Tag: \@optest
2100 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2101 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2102
2103 The main idea here is to generate basic instruction tests.
2104
2105 The probably simplest way of handling the diverse input, would be to use
2106 it to produce size optimized byte code for a simple interpreter that
2107 modifies the register input and output states.
2108
2109 An alternative to the interpreter would be creating multiple tables,
2110 but that becomes rather complicated wrt what goes where and then to use
2111 them in an efficient manner.
2112 """
2113 oInstr = self.ensureInstructionForOpTag(iTagLine);
2114
2115 #
2116 # Do it section by section.
2117 #
2118 for asSectionLines in aasSections:
2119 #
2120 # Sort the input into outputs, inputs and selector conditions.
2121 #
2122 sFlatSection = self.flattenAllSections([asSectionLines,]);
2123 if not sFlatSection:
2124 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2125 continue;
2126 oTest = InstructionTest(oInstr);
2127
2128 asSelectors = [];
2129 asInputs = [];
2130 asOutputs = [];
2131 asCur = asOutputs;
2132 fRc = True;
2133 asWords = sFlatSection.split();
2134 for iWord in range(len(asWords) - 1, -1, -1):
2135 sWord = asWords[iWord];
2136 # Check for array switchers.
2137 if sWord == '->':
2138 if asCur != asOutputs:
2139 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2140 break;
2141 asCur = asInputs;
2142 elif sWord == '/':
2143 if asCur != asInputs:
2144 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2145 break;
2146 asCur = asSelectors;
2147 else:
2148 asCur.insert(0, sWord);
2149
2150 #
2151 # Validate and add selectors.
2152 #
2153 for sCond in asSelectors:
2154 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2155 oSelector = None;
2156 for sOp in TestSelector.kasCompareOps:
2157 off = sCondExp.find(sOp);
2158 if off >= 0:
2159 sVariable = sCondExp[:off];
2160 sValue = sCondExp[off + len(sOp):];
2161 if sVariable in TestSelector.kdVariables:
2162 if sValue in TestSelector.kdVariables[sVariable]:
2163 oSelector = TestSelector(sVariable, sOp, sValue);
2164 else:
2165 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2166 % ( sTag, sValue, sCond,
2167 TestSelector.kdVariables[sVariable].keys(),));
2168 else:
2169 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2170 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2171 break;
2172 if oSelector is not None:
2173 for oExisting in oTest.aoSelectors:
2174 if oExisting.sVariable == oSelector.sVariable:
2175 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2176 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2177 oTest.aoSelectors.append(oSelector);
2178 else:
2179 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2180
2181 #
2182 # Validate outputs and inputs, adding them to the test as we go along.
2183 #
2184 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2185 asValidFieldKinds = [ 'both', sDesc, ];
2186 for sItem in asItems:
2187 oItem = None;
2188 for sOp in TestInOut.kasOperators:
2189 off = sItem.find(sOp);
2190 if off < 0:
2191 continue;
2192 sField = sItem[:off];
2193 sValueType = sItem[off + len(sOp):];
2194 if sField in TestInOut.kdFields \
2195 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2196 asSplit = sValueType.split(':', 1);
2197 sValue = asSplit[0];
2198 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2199 if sType in TestInOut.kdTypes:
2200 oValid = TestInOut.kdTypes[sType].validate(sValue);
2201 if oValid is True:
2202 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2203 oItem = TestInOut(sField, sOp, sValue, sType);
2204 else:
2205 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2206 % ( sTag, sDesc, sItem, ));
2207 else:
2208 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2209 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2210 else:
2211 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2212 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2213 else:
2214 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2215 % ( sTag, sDesc, sField, sItem,
2216 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2217 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2218 break;
2219 if oItem is not None:
2220 for oExisting in aoDst:
2221 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2222 self.errorComment(iTagLine,
2223 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2224 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2225 aoDst.append(oItem);
2226 else:
2227 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2228
2229 #
2230 # .
2231 #
2232 if fRc:
2233 oInstr.aoTests.append(oTest);
2234 else:
2235 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2236 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2237 % (sTag, asSelectors, asInputs, asOutputs,));
2238
2239 _ = iEndLine;
2240 return True;
2241
2242 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2243 """
2244 Tag: \@optestign | \@optestignore
2245 Value: <value is ignored>
2246
2247 This is a simple trick to ignore a test while debugging another.
2248
2249 See also \@oponlytest.
2250 """
2251 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2252 return True;
2253
2254 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2255 """
2256 Tag: \@opcopytests
2257 Value: <opstat | function> [..]
2258 Example: \@opcopytests add_Eb_Gb
2259
2260 Trick to avoid duplicating tests for different encodings of the same
2261 operation.
2262 """
2263 oInstr = self.ensureInstructionForOpTag(iTagLine);
2264
2265 # Flatten, validate and append the copy job to the instruction. We execute
2266 # them after parsing all the input so we can handle forward references.
2267 asToCopy = self.flattenAllSections(aasSections).split();
2268 if not asToCopy:
2269 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2270 for sToCopy in asToCopy:
2271 if sToCopy not in oInstr.asCopyTests:
2272 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2273 oInstr.asCopyTests.append(sToCopy);
2274 else:
2275 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2276 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2277 else:
2278 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2279
2280 _ = iEndLine;
2281 return True;
2282
2283 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2284 """
2285 Tag: \@oponlytest
2286 Value: none
2287
2288 Only test instructions with this tag. This is a trick that is handy
2289 for singling out one or two new instructions or tests.
2290
2291 See also \@optestignore.
2292 """
2293 oInstr = self.ensureInstructionForOpTag(iTagLine);
2294
2295 # Validate and add instruction to only test dictionary.
2296 sValue = self.flattenAllSections(aasSections).strip();
2297 if sValue:
2298 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2299
2300 if oInstr not in g_aoOnlyTestInstructions:
2301 g_aoOnlyTestInstructions.append(oInstr);
2302
2303 _ = iEndLine;
2304 return True;
2305
2306 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2307 """
2308 Tag: \@opxcpttype
2309 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2310
2311 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2312 """
2313 oInstr = self.ensureInstructionForOpTag(iTagLine);
2314
2315 # Flatten as a space separated list, split it up and validate the values.
2316 asTypes = self.flattenAllSections(aasSections).split();
2317 if len(asTypes) != 1:
2318 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2319 sType = asTypes[0];
2320 if sType not in g_kdXcptTypes:
2321 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2322 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2323 # Set it.
2324 if oInstr.sXcptType is not None:
2325 return self.errorComment(iTagLine,
2326 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2327 % ( sTag, oInstr.sXcptType, sType,));
2328 oInstr.sXcptType = sType;
2329
2330 _ = iEndLine;
2331 return True;
2332
2333 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2334 """
2335 Tag: \@opfunction
2336 Value: <VMM function name>
2337
2338 This is for explicitly setting the IEM function name. Normally we pick
2339 this up from the FNIEMOP_XXX macro invocation after the description, or
2340 generate it from the mnemonic and operands.
2341
2342 It it thought it maybe necessary to set it when specifying instructions
2343 which implementation isn't following immediately or aren't implemented yet.
2344 """
2345 oInstr = self.ensureInstructionForOpTag(iTagLine);
2346
2347 # Flatten and validate the value.
2348 sFunction = self.flattenAllSections(aasSections);
2349 if not self.oReFunctionName.match(sFunction):
2350 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2351 % (sTag, sFunction, self.oReFunctionName.pattern));
2352
2353 if oInstr.sFunction is not None:
2354 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2355 % (sTag, oInstr.sFunction, sFunction,));
2356 oInstr.sFunction = sFunction;
2357
2358 _ = iEndLine;
2359 return True;
2360
2361 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2362 """
2363 Tag: \@opstats
2364 Value: <VMM statistics base name>
2365
2366 This is for explicitly setting the statistics name. Normally we pick
2367 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2368 the mnemonic and operands.
2369
2370 It it thought it maybe necessary to set it when specifying instructions
2371 which implementation isn't following immediately or aren't implemented yet.
2372 """
2373 oInstr = self.ensureInstructionForOpTag(iTagLine);
2374
2375 # Flatten and validate the value.
2376 sStats = self.flattenAllSections(aasSections);
2377 if not self.oReStatsName.match(sStats):
2378 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2379 % (sTag, sStats, self.oReStatsName.pattern));
2380
2381 if oInstr.sStats is not None:
2382 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2383 % (sTag, oInstr.sStats, sStats,));
2384 oInstr.sStats = sStats;
2385
2386 _ = iEndLine;
2387 return True;
2388
2389 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2390 """
2391 Tag: \@opdone
2392 Value: none
2393
2394 Used to explictily flush the instructions that have been specified.
2395 """
2396 sFlattened = self.flattenAllSections(aasSections);
2397 if sFlattened != '':
2398 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2399 _ = sTag; _ = iEndLine;
2400 return self.doneInstructions();
2401
2402 ## @}
2403
2404
2405 def parseComment(self):
2406 """
2407 Parse the current comment (self.sComment).
2408
2409 If it's a opcode specifiying comment, we reset the macro stuff.
2410 """
2411 #
2412 # Reject if comment doesn't seem to contain anything interesting.
2413 #
2414 if self.sComment.find('Opcode') < 0 \
2415 and self.sComment.find('@') < 0:
2416 return False;
2417
2418 #
2419 # Split the comment into lines, removing leading asterisks and spaces.
2420 # Also remove leading and trailing empty lines.
2421 #
2422 asLines = self.sComment.split('\n');
2423 for iLine, sLine in enumerate(asLines):
2424 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2425
2426 while asLines and not asLines[0]:
2427 self.iCommentLine += 1;
2428 asLines.pop(0);
2429
2430 while asLines and not asLines[-1]:
2431 asLines.pop(len(asLines) - 1);
2432
2433 #
2434 # Check for old style: Opcode 0x0f 0x12
2435 #
2436 if asLines[0].startswith('Opcode '):
2437 self.parseCommentOldOpcode(asLines);
2438
2439 #
2440 # Look for @op* tagged data.
2441 #
2442 cOpTags = 0;
2443 sFlatDefault = None;
2444 sCurTag = '@default';
2445 iCurTagLine = 0;
2446 asCurSection = [];
2447 aasSections = [ asCurSection, ];
2448 for iLine, sLine in enumerate(asLines):
2449 if not sLine.startswith('@'):
2450 if sLine:
2451 asCurSection.append(sLine);
2452 elif asCurSection:
2453 asCurSection = [];
2454 aasSections.append(asCurSection);
2455 else:
2456 #
2457 # Process the previous tag.
2458 #
2459 if not asCurSection and len(aasSections) > 1:
2460 aasSections.pop(-1);
2461 if sCurTag in self.dTagHandlers:
2462 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2463 cOpTags += 1;
2464 elif sCurTag.startswith('@op'):
2465 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2466 elif sCurTag == '@default':
2467 sFlatDefault = self.flattenAllSections(aasSections);
2468 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2469 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2470 elif sCurTag in ['@encoding', '@opencoding']:
2471 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2472
2473 #
2474 # New tag.
2475 #
2476 asSplit = sLine.split(None, 1);
2477 sCurTag = asSplit[0].lower();
2478 if len(asSplit) > 1:
2479 asCurSection = [asSplit[1],];
2480 else:
2481 asCurSection = [];
2482 aasSections = [asCurSection, ];
2483 iCurTagLine = iLine;
2484
2485 #
2486 # Process the final tag.
2487 #
2488 if not asCurSection and len(aasSections) > 1:
2489 aasSections.pop(-1);
2490 if sCurTag in self.dTagHandlers:
2491 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2492 cOpTags += 1;
2493 elif sCurTag.startswith('@op'):
2494 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2495 elif sCurTag == '@default':
2496 sFlatDefault = self.flattenAllSections(aasSections);
2497
2498 #
2499 # Don't allow default text in blocks containing @op*.
2500 #
2501 if cOpTags > 0 and sFlatDefault:
2502 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2503
2504 return True;
2505
2506 def parseMacroInvocation(self, sInvocation):
2507 """
2508 Parses a macro invocation.
2509
2510 Returns a tuple, first element is the offset following the macro
2511 invocation. The second element is a list of macro arguments, where the
2512 zero'th is the macro name.
2513 """
2514 # First the name.
2515 offOpen = sInvocation.find('(');
2516 if offOpen <= 0:
2517 self.raiseError("macro invocation open parenthesis not found");
2518 sName = sInvocation[:offOpen].strip();
2519 if not self.oReMacroName.match(sName):
2520 return self.error("invalid macro name '%s'" % (sName,));
2521 asRet = [sName, ];
2522
2523 # Arguments.
2524 iLine = self.iLine;
2525 cDepth = 1;
2526 off = offOpen + 1;
2527 offStart = off;
2528 while cDepth > 0:
2529 if off >= len(sInvocation):
2530 if iLine >= len(self.asLines):
2531 return self.error('macro invocation beyond end of file');
2532 sInvocation += self.asLines[iLine];
2533 iLine += 1;
2534 ch = sInvocation[off];
2535
2536 if ch == ',' or ch == ')':
2537 if cDepth == 1:
2538 asRet.append(sInvocation[offStart:off].strip());
2539 offStart = off + 1;
2540 if ch == ')':
2541 cDepth -= 1;
2542 elif ch == '(':
2543 cDepth += 1;
2544 off += 1;
2545
2546 return (off, asRet);
2547
2548 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2549 """
2550 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2551 """
2552 offHit = sCode.find(sMacro);
2553 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2554 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2555 return (offHit + offAfter, asRet);
2556 return (len(sCode), None);
2557
2558 def findAndParseMacroInvocation(self, sCode, sMacro):
2559 """
2560 Returns None if not found, arguments as per parseMacroInvocation if found.
2561 """
2562 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2563
2564 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2565 """
2566 Returns same as findAndParseMacroInvocation.
2567 """
2568 for sMacro in asMacro:
2569 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2570 if asRet is not None:
2571 return asRet;
2572 return None;
2573
2574 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2575 sDisHints, sIemHints, asOperands):
2576 """
2577 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2578 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2579 """
2580 #
2581 # Some invocation checks.
2582 #
2583 if sUpper != sUpper.upper():
2584 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2585 if sLower != sLower.lower():
2586 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2587 if sUpper.lower() != sLower:
2588 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2589 if not self.oReMnemonic.match(sLower):
2590 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2591
2592 #
2593 # Check if sIemHints tells us to not consider this macro invocation.
2594 #
2595 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2596 return True;
2597
2598 # Apply to the last instruction only for now.
2599 if not self.aoCurInstrs:
2600 self.addInstruction();
2601 oInstr = self.aoCurInstrs[-1];
2602 if oInstr.iLineMnemonicMacro == -1:
2603 oInstr.iLineMnemonicMacro = self.iLine;
2604 else:
2605 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2606 % (sMacro, oInstr.iLineMnemonicMacro,));
2607
2608 # Mnemonic
2609 if oInstr.sMnemonic is None:
2610 oInstr.sMnemonic = sLower;
2611 elif oInstr.sMnemonic != sLower:
2612 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2613
2614 # Process operands.
2615 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2616 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2617 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2618 for iOperand, sType in enumerate(asOperands):
2619 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2620 if sWhere is None:
2621 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2622 if iOperand < len(oInstr.aoOperands): # error recovery.
2623 sWhere = oInstr.aoOperands[iOperand].sWhere;
2624 sType = oInstr.aoOperands[iOperand].sType;
2625 else:
2626 sWhere = 'reg';
2627 sType = 'Gb';
2628 if iOperand == len(oInstr.aoOperands):
2629 oInstr.aoOperands.append(Operand(sWhere, sType))
2630 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2631 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2632 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2633 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2634
2635 # Encoding.
2636 if sForm not in g_kdIemForms:
2637 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2638 else:
2639 if oInstr.sEncoding is None:
2640 oInstr.sEncoding = g_kdIemForms[sForm][0];
2641 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2642 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2643 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2644
2645 # Check the parameter locations for the encoding.
2646 if g_kdIemForms[sForm][1] is not None:
2647 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2648 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2649 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2650 else:
2651 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2652 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2653 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2654 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2655
2656 # Stats.
2657 if not self.oReStatsName.match(sStats):
2658 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2659 elif oInstr.sStats is None:
2660 oInstr.sStats = sStats;
2661 elif oInstr.sStats != sStats:
2662 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2663 % (sMacro, oInstr.sStats, sStats,));
2664
2665 # Process the hints (simply merge with @ophints w/o checking anything).
2666 for sHint in sDisHints.split('|'):
2667 sHint = sHint.strip();
2668 if sHint.startswith('DISOPTYPE_'):
2669 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2670 if sShortHint in g_kdHints:
2671 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2672 else:
2673 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2674 elif sHint != '0':
2675 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2676
2677 for sHint in sIemHints.split('|'):
2678 sHint = sHint.strip();
2679 if sHint.startswith('IEMOPHINT_'):
2680 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2681 if sShortHint in g_kdHints:
2682 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2683 else:
2684 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2685 elif sHint != '0':
2686 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2687
2688
2689 _ = sAsm;
2690 return True;
2691
2692 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2693 """
2694 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2695 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2696 """
2697 if not asOperands:
2698 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2699 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2700 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2701
2702 def checkCodeForMacro(self, sCode):
2703 """
2704 Checks code for relevant macro invocation.
2705 """
2706 #
2707 # Scan macro invocations.
2708 #
2709 if sCode.find('(') > 0:
2710 # Look for instruction decoder function definitions. ASSUME single line.
2711 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2712 [ 'FNIEMOP_DEF',
2713 'FNIEMOP_STUB',
2714 'FNIEMOP_STUB_1',
2715 'FNIEMOP_UD_STUB',
2716 'FNIEMOP_UD_STUB_1' ]);
2717 if asArgs is not None:
2718 sFunction = asArgs[1];
2719
2720 if not self.aoCurInstrs:
2721 self.addInstruction();
2722 for oInstr in self.aoCurInstrs:
2723 if oInstr.iLineFnIemOpMacro == -1:
2724 oInstr.iLineFnIemOpMacro = self.iLine;
2725 else:
2726 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2727 self.setInstrunctionAttrib('sFunction', sFunction);
2728 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2729 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2730 if asArgs[0].find('STUB') > 0:
2731 self.doneInstructions();
2732 return True;
2733
2734 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2735 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2736 if asArgs is not None:
2737 if len(self.aoCurInstrs) == 1:
2738 oInstr = self.aoCurInstrs[0];
2739 if oInstr.sStats is None:
2740 oInstr.sStats = asArgs[1];
2741 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2742
2743 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2744 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2745 if asArgs is not None:
2746 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2747 []);
2748 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2749 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2750 if asArgs is not None:
2751 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2752 [asArgs[6],]);
2753 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2754 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2755 if asArgs is not None:
2756 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2757 [asArgs[6], asArgs[7]]);
2758 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2759 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2760 if asArgs is not None:
2761 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2762 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2763 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2764 # a_fIemHints)
2765 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2766 if asArgs is not None:
2767 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2768 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2769
2770 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2771 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2772 if asArgs is not None:
2773 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2774 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2775 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2776 if asArgs is not None:
2777 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2778 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2779 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2780 if asArgs is not None:
2781 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2782 [asArgs[4], asArgs[5],]);
2783 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2784 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2785 if asArgs is not None:
2786 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2787 [asArgs[4], asArgs[5], asArgs[6],]);
2788 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2789 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2790 if asArgs is not None:
2791 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2792 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2793
2794 return False;
2795
2796
2797 def parse(self):
2798 """
2799 Parses the given file.
2800 Returns number or errors.
2801 Raises exception on fatal trouble.
2802 """
2803 self.debug('Parsing %s' % (self.sSrcFile,));
2804
2805 while self.iLine < len(self.asLines):
2806 sLine = self.asLines[self.iLine];
2807 self.iLine += 1;
2808
2809 # We only look for comments, so only lines with a slash might possibly
2810 # influence the parser state.
2811 if sLine.find('/') >= 0:
2812 #self.debug('line %d: slash' % (self.iLine,));
2813
2814 offLine = 0;
2815 while offLine < len(sLine):
2816 if self.iState == self.kiCode:
2817 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2818 if offHit >= 0:
2819 self.checkCodeForMacro(sLine[offLine:offHit]);
2820 self.sComment = '';
2821 self.iCommentLine = self.iLine;
2822 self.iState = self.kiCommentMulti;
2823 offLine = offHit + 2;
2824 else:
2825 self.checkCodeForMacro(sLine[offLine:]);
2826 offLine = len(sLine);
2827
2828 elif self.iState == self.kiCommentMulti:
2829 offHit = sLine.find('*/', offLine);
2830 if offHit >= 0:
2831 self.sComment += sLine[offLine:offHit];
2832 self.iState = self.kiCode;
2833 offLine = offHit + 2;
2834 self.parseComment();
2835 else:
2836 self.sComment += sLine[offLine:];
2837 offLine = len(sLine);
2838 else:
2839 assert False;
2840
2841 # No slash, but append the line if in multi-line comment.
2842 elif self.iState == self.kiCommentMulti:
2843 #self.debug('line %d: multi' % (self.iLine,));
2844 self.sComment += sLine;
2845
2846 # No slash, but check code line for relevant macro.
2847 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2848 #self.debug('line %d: macro' % (self.iLine,));
2849 self.checkCodeForMacro(sLine);
2850
2851 # If the line is a '}' in the first position, complete the instructions.
2852 elif self.iState == self.kiCode and sLine[0] == '}':
2853 #self.debug('line %d: }' % (self.iLine,));
2854 self.doneInstructions();
2855
2856 self.doneInstructions();
2857 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2858 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2859 return self.printErrors();
2860
2861
2862def __parseFileByName(sSrcFile, sDefaultMap):
2863 """
2864 Parses one source file for instruction specfications.
2865 """
2866 #
2867 # Read sSrcFile into a line array.
2868 #
2869 try:
2870 oFile = open(sSrcFile, "r");
2871 except Exception as oXcpt:
2872 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2873 try:
2874 asLines = oFile.readlines();
2875 except Exception as oXcpt:
2876 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2877 finally:
2878 oFile.close();
2879
2880 #
2881 # Do the parsing.
2882 #
2883 try:
2884 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2885 except ParserException as oXcpt:
2886 print(str(oXcpt));
2887 raise;
2888 except Exception as oXcpt:
2889 raise;
2890
2891 return cErrors;
2892
2893
2894def __doTestCopying():
2895 """
2896 Executes the asCopyTests instructions.
2897 """
2898 asErrors = [];
2899 for oDstInstr in g_aoAllInstructions:
2900 if oDstInstr.asCopyTests:
2901 for sSrcInstr in oDstInstr.asCopyTests:
2902 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
2903 if oSrcInstr:
2904 aoSrcInstrs = [oSrcInstr,];
2905 else:
2906 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
2907 if aoSrcInstrs:
2908 for oSrcInstr in aoSrcInstrs:
2909 if oSrcInstr != oDstInstr:
2910 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
2911 else:
2912 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
2913 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2914 else:
2915 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
2916 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2917
2918 if asErrors:
2919 sys.stderr.write(u''.join(asErrors));
2920 return len(asErrors);
2921
2922
2923def __applyOnlyTest():
2924 """
2925 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
2926 all other instructions so that only these get tested.
2927 """
2928 if g_aoOnlyTestInstructions:
2929 for oInstr in g_aoAllInstructions:
2930 if oInstr.aoTests:
2931 if oInstr not in g_aoOnlyTestInstructions:
2932 oInstr.aoTests = [];
2933 return 0;
2934
2935def __parseAll():
2936 """
2937 Parses all the IEMAllInstruction*.cpp.h files.
2938
2939 Raises exception on failure.
2940 """
2941 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2942 cErrors = 0;
2943 for sDefaultMap, sName in [
2944 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2945 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2946 ]:
2947 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2948 cErrors += __doTestCopying();
2949 cErrors += __applyOnlyTest();
2950
2951 if cErrors != 0:
2952 #raise Exception('%d parse errors' % (cErrors,));
2953 sys.exit(1);
2954 return True;
2955
2956
2957
2958__parseAll();
2959
2960
2961#
2962# Generators (may perhaps move later).
2963#
2964def generateDisassemblerTables(oDstFile = sys.stdout):
2965 """
2966 Generates disassembler tables.
2967 """
2968
2969 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2970 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2971 assert oMap.sName == sName;
2972 asLines = [];
2973
2974 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2975 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2976 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2977 asLines.append('{');
2978
2979 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2980
2981 aoTableOrder = oMap.getInstructionsInTableOrder();
2982 for iInstr, oInstr in enumerate(aoTableOrder):
2983
2984 if (iInstr & 0xf) == 0:
2985 if iInstr != 0:
2986 asLines.append('');
2987 asLines.append(' /* %x */' % (iInstr >> 4,));
2988
2989 if oInstr is None:
2990 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2991 elif isinstance(oInstr, list):
2992 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2993 else:
2994 sMacro = 'OP';
2995 cMaxOperands = 3;
2996 if len(oInstr.aoOperands) > 3:
2997 sMacro = 'OPVEX'
2998 cMaxOperands = 4;
2999 assert len(oInstr.aoOperands) <= cMaxOperands;
3000
3001 #
3002 # Format string.
3003 #
3004 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3005 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3006 sTmp += ' ' if iOperand == 0 else ',';
3007 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3008 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3009 else:
3010 sTmp += g_kdOpTypes[oOperand.sType][2];
3011 sTmp += '",';
3012 asColumns = [ sTmp, ];
3013
3014 #
3015 # Decoders.
3016 #
3017 iStart = len(asColumns);
3018 if oInstr.sEncoding is None:
3019 pass;
3020 elif oInstr.sEncoding == 'ModR/M':
3021 # ASSUME the first operand is using the ModR/M encoding
3022 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3023 asColumns.append('IDX_ParseModRM,');
3024 ## @todo IDX_ParseVexDest
3025 # Is second operand using ModR/M too?
3026 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3027 asColumns.append('IDX_UseModRM,')
3028 elif oInstr.sEncoding in [ 'prefix', ]:
3029 for oOperand in oInstr.aoOperands:
3030 asColumns.append('0,');
3031 elif oInstr.sEncoding in [ 'fixed' ]:
3032 pass;
3033 elif oInstr.sEncoding == 'vex2':
3034 asColumns.append('IDX_ParseVex2b,')
3035 elif oInstr.sEncoding == 'vex3':
3036 asColumns.append('IDX_ParseVex3b,')
3037 elif oInstr.sEncoding in g_dInstructionMaps:
3038 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3039 else:
3040 ## @todo
3041 #IDX_ParseTwoByteEsc,
3042 #IDX_ParseGrp1,
3043 #IDX_ParseShiftGrp2,
3044 #IDX_ParseGrp3,
3045 #IDX_ParseGrp4,
3046 #IDX_ParseGrp5,
3047 #IDX_Parse3DNow,
3048 #IDX_ParseGrp6,
3049 #IDX_ParseGrp7,
3050 #IDX_ParseGrp8,
3051 #IDX_ParseGrp9,
3052 #IDX_ParseGrp10,
3053 #IDX_ParseGrp12,
3054 #IDX_ParseGrp13,
3055 #IDX_ParseGrp14,
3056 #IDX_ParseGrp15,
3057 #IDX_ParseGrp16,
3058 #IDX_ParseThreeByteEsc4,
3059 #IDX_ParseThreeByteEsc5,
3060 #IDX_ParseModFence,
3061 #IDX_ParseEscFP,
3062 #IDX_ParseNopPause,
3063 #IDX_ParseInvOpModRM,
3064 assert False, str(oInstr);
3065
3066 # Check for immediates and stuff in the remaining operands.
3067 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3068 sIdx = g_kdOpTypes[oOperand.sType][0];
3069 if sIdx != 'IDX_UseModRM':
3070 asColumns.append(sIdx + ',');
3071 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3072
3073 #
3074 # Opcode and operands.
3075 #
3076 assert oInstr.sDisEnum, str(oInstr);
3077 asColumns.append(oInstr.sDisEnum + ',');
3078 iStart = len(asColumns)
3079 for oOperand in oInstr.aoOperands:
3080 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3081 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3082
3083 #
3084 # Flags.
3085 #
3086 sTmp = '';
3087 for sHint in sorted(oInstr.dHints.keys()):
3088 sDefine = g_kdHints[sHint];
3089 if sDefine.startswith('DISOPTYPE_'):
3090 if sTmp:
3091 sTmp += ' | ' + sDefine;
3092 else:
3093 sTmp += sDefine;
3094 if sTmp:
3095 sTmp += '),';
3096 else:
3097 sTmp += '0),';
3098 asColumns.append(sTmp);
3099
3100 #
3101 # Format the columns into a line.
3102 #
3103 sLine = '';
3104 for i, s in enumerate(asColumns):
3105 if len(sLine) < aoffColumns[i]:
3106 sLine += ' ' * (aoffColumns[i] - len(sLine));
3107 else:
3108 sLine += ' ';
3109 sLine += s;
3110
3111 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3112 # DISOPTYPE_HARMLESS),
3113 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3114 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3115
3116 asLines.append(sLine);
3117
3118 asLines.append('};');
3119 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3120
3121 #
3122 # Write out the lines.
3123 #
3124 oDstFile.write('\n'.join(asLines));
3125 oDstFile.write('\n');
3126 break; #for now
3127
3128if __name__ == '__main__':
3129 generateDisassemblerTables();
3130
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette