VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66314

Last change on this file since 66314 was 66314, checked in by vboxsync, 8 years ago

IEM: Use RTUINT128U instead of uint128_t; started on movsldup Vdq,Wdq.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 127.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66314 2017-03-28 21:28:34Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66314 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
164 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
165 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
166 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
167 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
168
169 # ModR/M.rm - register only.
170 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
171 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
172
173 # ModR/M.rm - memory only.
174 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
175 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
176
177 # ModR/M.reg
178 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
179 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
180 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
181 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
182 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
183 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
184 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
185 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
186 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
187
188 # Immediate values.
189 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
190 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
191 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
192 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
193 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
194 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
195
196 # Address operands (no ModR/M).
197 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
198 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
199
200 # Relative jump targets
201 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
202 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
203
204 # DS:rSI
205 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
206 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
207 # ES:rDI
208 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
209 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
210
211 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
212
213 # Fixed registers.
214 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
215 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
216 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
217 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
218 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
219 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
220 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
221 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
222};
223
224# IDX_ParseFixedReg
225# IDX_ParseVexDest
226
227
228## IEMFORM_XXX mappings.
229g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
230 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
231 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
232 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
233 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
234 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
235 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
236 'M': ( 'ModR/M', [ 'rm', ], ),
237 'M_REG': ( 'ModR/M', [ 'rm', ], ),
238 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
239 'R': ( 'ModR/M', [ 'reg', ], ),
240 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
241 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
242 'FIXED': ( 'fixed', None, )
243};
244
245## \@oppfx values.
246g_kdPrefixes = {
247 'none': [],
248 '0x66': [],
249 '0xf3': [],
250 '0xf2': [],
251};
252
253## Special \@opcode tag values.
254g_kdSpecialOpcodes = {
255 '/reg': [],
256 'mr/reg': [],
257 '11 /reg': [],
258 '!11 /reg': [],
259 '11 mr/reg': [],
260 '!11 mr/reg': [],
261};
262
263## Special \@opcodesub tag values.
264g_kdSubOpcodes = {
265 'none': [ None, ],
266 '11 mr/reg': [ '11 mr/reg', ],
267 '11': [ '11 mr/reg', ], ##< alias
268 '!11 mr/reg': [ '!11 mr/reg', ],
269 '!11': [ '!11 mr/reg', ], ##< alias
270};
271
272## Valid values for \@openc
273g_kdEncodings = {
274 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
275 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
276 'prefix': [ None, ], ##< Prefix
277};
278
279## \@opunused, \@opinvalid, \@opinvlstyle
280g_kdInvalidStyles = {
281 'immediate': [], ##< CPU stops decoding immediately after the opcode.
282 'intel-modrm': [], ##< Intel decodes ModR/M.
283 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
284 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
285 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
286};
287
288g_kdCpuNames = {
289 '8086': (),
290 '80186': (),
291 '80286': (),
292 '80386': (),
293 '80486': (),
294};
295
296## \@opcpuid
297g_kdCpuIdFlags = {
298 'vme': 'X86_CPUID_FEATURE_EDX_VME',
299 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
300 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
301 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
302 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
303 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
304 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
305 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
306 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
307 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
308 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
309 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
310 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
311 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
312 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
313 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
314 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
315 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
316 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
317 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
318 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
319 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
320 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
321 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
322 'aes': 'X86_CPUID_FEATURE_ECX_AES',
323 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
324 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
325 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
326 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
327
328 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
329 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
330 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
331 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
332 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
333 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
334 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
335 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
336 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
337 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
338};
339
340## \@ophints values.
341g_kdHints = {
342 'invalid': 'DISOPTYPE_INVALID', ##<
343 'harmless': 'DISOPTYPE_HARMLESS', ##<
344 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
345 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
346 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
347 'portio': 'DISOPTYPE_PORTIO', ##<
348 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
349 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
350 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
351 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
352 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
353 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
354 'illegal': 'DISOPTYPE_ILLEGAL', ##<
355 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
356 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
357 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
358 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
359 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
360 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
361 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
362 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
363 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
364 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
365 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
366 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
367 ## (only in 16 & 32 bits mode!)
368 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
369 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
370 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
371 'ignores_op_size': '', ##< Ignores both operand size prefixes.
372 'lock_allowed': '', ##< Lock prefix allowed.
373};
374
375## \@opxcpttype values (see SDMv2 2.4, 2.7).
376g_kdXcptTypes = {
377 'none': [],
378 '1': [],
379 '2': [],
380 '3': [],
381 '4': [],
382 '4UA': [],
383 '5': [],
384 '6': [],
385 '7': [],
386 '8': [],
387 '11': [],
388 '12': [],
389 'E1': [],
390 'E1NF': [],
391 'E2': [],
392 'E3': [],
393 'E3NF': [],
394 'E4': [],
395 'E4NF': [],
396 'E5': [],
397 'E5NF': [],
398 'E6': [],
399 'E6NF': [],
400 'E7NF': [],
401 'E9': [],
402 'E9NF': [],
403 'E10': [],
404 'E11': [],
405 'E12': [],
406 'E12NF': [],
407};
408
409
410def _isValidOpcodeByte(sOpcode):
411 """
412 Checks if sOpcode is a valid lower case opcode byte.
413 Returns true/false.
414 """
415 if len(sOpcode) == 4:
416 if sOpcode[:2] == '0x':
417 if sOpcode[2] in '0123456789abcdef':
418 if sOpcode[3] in '0123456789abcdef':
419 return True;
420 return False;
421
422
423class InstructionMap(object):
424 """
425 Instruction map.
426
427 The opcode map provides the lead opcode bytes (empty for the one byte
428 opcode map). An instruction can be member of multiple opcode maps as long
429 as it uses the same opcode value within the map (because of VEX).
430 """
431
432 kdEncodings = {
433 'legacy': [],
434 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
435 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
436 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
437 'xop8': [], ##< XOP prefix with vvvvv = 8
438 'xop9': [], ##< XOP prefix with vvvvv = 9
439 'xop10': [], ##< XOP prefix with vvvvv = 10
440 };
441 ## Selectors.
442 ## The first value is the number of table entries required by a
443 ## decoder or disassembler for this type of selector.
444 kdSelectors = {
445 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
446 '/r': [ 8, ], ##< modrm.reg selects the instruction.
447 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
448 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
449 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
450 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
451 };
452
453 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
454 assert sSelector in self.kdSelectors;
455 assert sEncoding in self.kdEncodings;
456 if asLeadOpcodes is None:
457 asLeadOpcodes = [];
458 else:
459 for sOpcode in asLeadOpcodes:
460 assert _isValidOpcodeByte(sOpcode);
461 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
462
463 self.sName = sName;
464 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
465 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
466 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
467 self.aoInstructions = []; # type: Instruction
468 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
469
470 def getTableSize(self):
471 """
472 Number of table entries. This corresponds directly to the selector.
473 """
474 return self.kdSelectors[self.sSelector][0];
475
476 def getInstructionIndex(self, oInstr):
477 """
478 Returns the table index for the instruction.
479 """
480 bOpcode = oInstr.getOpcodeByte();
481
482 # The byte selector is simple. We need a full opcode byte and need just return it.
483 if self.sSelector == 'byte':
484 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
485 return bOpcode;
486
487 # The other selectors needs masking and shifting.
488 if self.sSelector == '/r':
489 return (bOpcode >> 3) & 0x7;
490
491 if self.sSelector == 'mod /r':
492 return (bOpcode >> 3) & 0x1f;
493
494 if self.sSelector == '!11 /r':
495 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
496 return (bOpcode >> 3) & 0x7;
497
498 if self.sSelector == '11 /r':
499 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
500 return (bOpcode >> 3) & 0x7;
501
502 if self.sSelector == '11':
503 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
504 return bOpcode & 0x3f;
505
506 assert False, self.sSelector;
507 return -1;
508
509 def getInstructionsInTableOrder(self):
510 """
511 Get instructions in table order.
512
513 Returns array of instructions. Normally there is exactly one
514 instruction per entry. However the entry could also be None if
515 not instruction was specified for that opcode value. Or there
516 could be a list of instructions to deal with special encodings
517 where for instance prefix (e.g. REX.W) encodes a different
518 instruction or different CPUs have different instructions or
519 prefixes in the same place.
520 """
521 # Start with empty table.
522 cTable = self.getTableSize();
523 aoTable = [None] * cTable;
524
525 # Insert the instructions.
526 for oInstr in self.aoInstructions:
527 if oInstr.sOpcode:
528 idxOpcode = self.getInstructionIndex(oInstr);
529 assert idxOpcode < cTable, str(idxOpcode);
530
531 oExisting = aoTable[idxOpcode];
532 if oExisting is None:
533 aoTable[idxOpcode] = oInstr;
534 elif not isinstance(oExisting, list):
535 aoTable[idxOpcode] = list([oExisting, oInstr]);
536 else:
537 oExisting.append(oInstr);
538
539 return aoTable;
540
541
542 def getDisasTableName(self):
543 """
544 Returns the disassembler table name for this map.
545 """
546 sName = 'g_aDisas';
547 for sWord in self.sName.split('_'):
548 if sWord == 'm': # suffix indicating modrm.mod==mem
549 sName += '_m';
550 elif sWord == 'r': # suffix indicating modrm.mod==reg
551 sName += '_r';
552 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
553 sName += '_' + sWord;
554 else:
555 sWord = sWord.replace('grp', 'Grp');
556 sWord = sWord.replace('map', 'Map');
557 sName += sWord[0].upper() + sWord[1:];
558 return sName;
559
560
561class TestType(object):
562 """
563 Test value type.
564
565 This base class deals with integer like values. The fUnsigned constructor
566 parameter indicates the default stance on zero vs sign extending. It is
567 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
568 """
569 def __init__(self, sName, acbSizes = None, fUnsigned = True):
570 self.sName = sName;
571 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
572 self.fUnsigned = fUnsigned;
573
574 class BadValue(Exception):
575 """ Bad value exception. """
576 def __init__(self, sMessage):
577 Exception.__init__(self, sMessage);
578 self.sMessage = sMessage;
579
580 ## For ascii ~ operator.
581 kdHexInv = {
582 '0': 'f',
583 '1': 'e',
584 '2': 'd',
585 '3': 'c',
586 '4': 'b',
587 '5': 'a',
588 '6': '9',
589 '7': '8',
590 '8': '7',
591 '9': '6',
592 'a': '5',
593 'b': '4',
594 'c': '3',
595 'd': '2',
596 'e': '1',
597 'f': '0',
598 };
599
600 def get(self, sValue):
601 """
602 Get the shortest normal sized byte representation of oValue.
603
604 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
605 The latter form is for AND+OR pairs where the first entry is what to
606 AND with the field and the second the one or OR with.
607
608 Raises BadValue if invalid value.
609 """
610 if not sValue:
611 raise TestType.BadValue('empty value');
612
613 # Deal with sign and detect hexadecimal or decimal.
614 fSignExtend = not self.fUnsigned;
615 if sValue[0] == '-' or sValue[0] == '+':
616 fSignExtend = True;
617 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
618 else:
619 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
620
621 # try convert it to long integer.
622 try:
623 iValue = long(sValue, 16 if fHex else 10);
624 except Exception as oXcpt:
625 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
626
627 # Convert the hex string and pad it to a decent value. Negative values
628 # needs to be manually converted to something non-negative (~-n + 1).
629 if iValue >= 0:
630 sHex = hex(iValue);
631 if sys.version_info[0] < 3:
632 assert sHex[-1] == 'L';
633 sHex = sHex[:-1];
634 assert sHex[:2] == '0x';
635 sHex = sHex[2:];
636 else:
637 sHex = hex(-iValue - 1);
638 if sys.version_info[0] < 3:
639 assert sHex[-1] == 'L';
640 sHex = sHex[:-1];
641 assert sHex[:2] == '0x';
642 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
643 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
644 sHex = 'f' + sHex;
645
646 cDigits = len(sHex);
647 if cDigits <= self.acbSizes[-1] * 2:
648 for cb in self.acbSizes:
649 cNaturalDigits = cb * 2;
650 if cDigits <= cNaturalDigits:
651 break;
652 else:
653 cNaturalDigits = self.acbSizes[-1] * 2;
654 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
655 assert isinstance(cNaturalDigits, int)
656
657 if cNaturalDigits != cDigits:
658 cNeeded = cNaturalDigits - cDigits;
659 if iValue >= 0:
660 sHex = ('0' * cNeeded) + sHex;
661 else:
662 sHex = ('f' * cNeeded) + sHex;
663
664 # Invert and convert to bytearray and return it.
665 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
666
667 return ((fSignExtend, abValue),);
668
669 def validate(self, sValue):
670 """
671 Returns True if value is okay, error message on failure.
672 """
673 try:
674 self.get(sValue);
675 except TestType.BadValue as oXcpt:
676 return oXcpt.sMessage;
677 return True;
678
679 def isAndOrPair(self, sValue):
680 """
681 Checks if sValue is a pair.
682 """
683 _ = sValue;
684 return False;
685
686
687class TestTypeEflags(TestType):
688 """
689 Special value parsing for EFLAGS/RFLAGS/FLAGS.
690 """
691
692 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
693
694 def __init__(self, sName):
695 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
696
697 def get(self, sValue):
698 fClear = 0;
699 fSet = 0;
700 for sFlag in sValue.split(','):
701 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
702 if sConstant is None:
703 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
704 if sConstant[0] == '!':
705 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
706 else:
707 fSet |= g_kdX86EFlagsConstants[sConstant];
708
709 aoSet = TestType.get(self, '0x%x' % (fSet,));
710 if fClear != 0:
711 aoClear = TestType.get(self, '%#x' % (fClear,))
712 assert self.isAndOrPair(sValue) is True;
713 return (aoClear[0], aoSet[0]);
714 assert self.isAndOrPair(sValue) is False;
715 return aoSet;
716
717 def isAndOrPair(self, sValue):
718 for sZeroFlag in self.kdZeroValueFlags:
719 if sValue.find(sZeroFlag) >= 0:
720 return True;
721 return False;
722
723
724
725class TestInOut(object):
726 """
727 One input or output state modifier.
728
729 This should be thought as values to modify BS3REGCTX and extended (needs
730 to be structured) state.
731 """
732 ## Assigned operators.
733 kasOperators = [
734 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
735 '&~=',
736 '&=',
737 '|=',
738 '='
739 ];
740 ## Types
741 kdTypes = {
742 'uint': TestType('uint', fUnsigned = True),
743 'int': TestType('int'),
744 'efl': TestTypeEflags('efl'),
745 };
746 ## CPU context fields.
747 kdFields = {
748 # name: ( default type, [both|input|output], )
749 # Operands.
750 'op1': ( 'uint', 'both', ), ## \@op1
751 'op2': ( 'uint', 'both', ), ## \@op2
752 'op3': ( 'uint', 'both', ), ## \@op3
753 'op4': ( 'uint', 'both', ), ## \@op4
754 # Flags.
755 'efl': ( 'efl', 'both', ),
756 'efl_undef': ( 'uint', 'output', ),
757 # 8-bit GPRs.
758 'al': ( 'uint', 'both', ),
759 'cl': ( 'uint', 'both', ),
760 'dl': ( 'uint', 'both', ),
761 'bl': ( 'uint', 'both', ),
762 'ah': ( 'uint', 'both', ),
763 'ch': ( 'uint', 'both', ),
764 'dh': ( 'uint', 'both', ),
765 'bh': ( 'uint', 'both', ),
766 'r8l': ( 'uint', 'both', ),
767 'r9l': ( 'uint', 'both', ),
768 'r10l': ( 'uint', 'both', ),
769 'r11l': ( 'uint', 'both', ),
770 'r12l': ( 'uint', 'both', ),
771 'r13l': ( 'uint', 'both', ),
772 'r14l': ( 'uint', 'both', ),
773 'r15l': ( 'uint', 'both', ),
774 # 16-bit GPRs.
775 'ax': ( 'uint', 'both', ),
776 'dx': ( 'uint', 'both', ),
777 'cx': ( 'uint', 'both', ),
778 'bx': ( 'uint', 'both', ),
779 'sp': ( 'uint', 'both', ),
780 'bp': ( 'uint', 'both', ),
781 'si': ( 'uint', 'both', ),
782 'di': ( 'uint', 'both', ),
783 'r8w': ( 'uint', 'both', ),
784 'r9w': ( 'uint', 'both', ),
785 'r10w': ( 'uint', 'both', ),
786 'r11w': ( 'uint', 'both', ),
787 'r12w': ( 'uint', 'both', ),
788 'r13w': ( 'uint', 'both', ),
789 'r14w': ( 'uint', 'both', ),
790 'r15w': ( 'uint', 'both', ),
791 # 32-bit GPRs.
792 'eax': ( 'uint', 'both', ),
793 'edx': ( 'uint', 'both', ),
794 'ecx': ( 'uint', 'both', ),
795 'ebx': ( 'uint', 'both', ),
796 'esp': ( 'uint', 'both', ),
797 'ebp': ( 'uint', 'both', ),
798 'esi': ( 'uint', 'both', ),
799 'edi': ( 'uint', 'both', ),
800 'r8d': ( 'uint', 'both', ),
801 'r9d': ( 'uint', 'both', ),
802 'r10d': ( 'uint', 'both', ),
803 'r11d': ( 'uint', 'both', ),
804 'r12d': ( 'uint', 'both', ),
805 'r13d': ( 'uint', 'both', ),
806 'r14d': ( 'uint', 'both', ),
807 'r15d': ( 'uint', 'both', ),
808 # 64-bit GPRs.
809 'rax': ( 'uint', 'both', ),
810 'rdx': ( 'uint', 'both', ),
811 'rcx': ( 'uint', 'both', ),
812 'rbx': ( 'uint', 'both', ),
813 'rsp': ( 'uint', 'both', ),
814 'rbp': ( 'uint', 'both', ),
815 'rsi': ( 'uint', 'both', ),
816 'rdi': ( 'uint', 'both', ),
817 'r8': ( 'uint', 'both', ),
818 'r9': ( 'uint', 'both', ),
819 'r10': ( 'uint', 'both', ),
820 'r11': ( 'uint', 'both', ),
821 'r12': ( 'uint', 'both', ),
822 'r13': ( 'uint', 'both', ),
823 'r14': ( 'uint', 'both', ),
824 'r15': ( 'uint', 'both', ),
825 # 16-bit, 32-bit or 64-bit registers according to operand size.
826 'oz.rax': ( 'uint', 'both', ),
827 'oz.rdx': ( 'uint', 'both', ),
828 'oz.rcx': ( 'uint', 'both', ),
829 'oz.rbx': ( 'uint', 'both', ),
830 'oz.rsp': ( 'uint', 'both', ),
831 'oz.rbp': ( 'uint', 'both', ),
832 'oz.rsi': ( 'uint', 'both', ),
833 'oz.rdi': ( 'uint', 'both', ),
834 'oz.r8': ( 'uint', 'both', ),
835 'oz.r9': ( 'uint', 'both', ),
836 'oz.r10': ( 'uint', 'both', ),
837 'oz.r11': ( 'uint', 'both', ),
838 'oz.r12': ( 'uint', 'both', ),
839 'oz.r13': ( 'uint', 'both', ),
840 'oz.r14': ( 'uint', 'both', ),
841 'oz.r15': ( 'uint', 'both', ),
842 # Special ones.
843 'value.xcpt': ( 'uint', 'output', ),
844 };
845
846 def __init__(self, sField, sOp, sValue, sType):
847 assert sField in self.kdFields;
848 assert sOp in self.kasOperators;
849 self.sField = sField;
850 self.sOp = sOp;
851 self.sValue = sValue;
852 self.sType = sType;
853 assert isinstance(sField, str);
854 assert isinstance(sOp, str);
855 assert isinstance(sType, str);
856 assert isinstance(sValue, str);
857
858
859class TestSelector(object):
860 """
861 One selector for an instruction test.
862 """
863 ## Selector compare operators.
864 kasCompareOps = [ '==', '!=' ];
865 ## Selector variables and their valid values.
866 kdVariables = {
867 # Operand size.
868 'size': {
869 'o16': 'size_o16',
870 'o32': 'size_o32',
871 'o64': 'size_o64',
872 },
873 # Execution ring.
874 'ring': {
875 '0': 'ring_0',
876 '1': 'ring_1',
877 '2': 'ring_2',
878 '3': 'ring_3',
879 '0..2': 'ring_0_thru_2',
880 '1..3': 'ring_1_thru_3',
881 },
882 # Basic code mode.
883 'codebits': {
884 '64': 'code_64bit',
885 '32': 'code_32bit',
886 '16': 'code_16bit',
887 },
888 # cpu modes.
889 'mode': {
890 'real': 'mode_real',
891 'prot': 'mode_prot',
892 'long': 'mode_long',
893 'v86': 'mode_v86',
894 'smm': 'mode_smm',
895 'vmx': 'mode_vmx',
896 'svm': 'mode_svm',
897 },
898 # paging on/off
899 'paging': {
900 'on': 'paging_on',
901 'off': 'paging_off',
902 },
903 };
904 ## Selector shorthand predicates.
905 ## These translates into variable expressions.
906 kdPredicates = {
907 'o16': 'size==o16',
908 'o32': 'size==o32',
909 'o64': 'size==o64',
910 'ring0': 'ring==0',
911 '!ring0': 'ring==1..3',
912 'ring1': 'ring==1',
913 'ring2': 'ring==2',
914 'ring3': 'ring==3',
915 'user': 'ring==3',
916 'supervisor': 'ring==0..2',
917 'real': 'mode==real',
918 'prot': 'mode==prot',
919 'long': 'mode==long',
920 'v86': 'mode==v86',
921 'smm': 'mode==smm',
922 'vmx': 'mode==vmx',
923 'svm': 'mode==svm',
924 'paging': 'paging==on',
925 '!paging': 'paging==off',
926 };
927
928 def __init__(self, sVariable, sOp, sValue):
929 assert sVariable in self.kdVariables;
930 assert sOp in self.kasCompareOps;
931 assert sValue in self.kdVariables[sVariable];
932 self.sVariable = sVariable;
933 self.sOp = sOp;
934 self.sValue = sValue;
935
936
937class InstructionTest(object):
938 """
939 Instruction test.
940 """
941
942 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
943 self.oInstr = oInstr; # type: InstructionTest
944 self.aoInputs = []; # type: list(TestInOut)
945 self.aoOutputs = []; # type: list(TestInOut)
946 self.aoSelectors = []; # type: list(TestSelector)
947
948 def toString(self, fRepr = False):
949 """
950 Converts it to string representation.
951 """
952 asWords = [];
953 if self.aoSelectors:
954 for oSelector in self.aoSelectors:
955 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
956 asWords.append('/');
957
958 for oModifier in self.aoInputs:
959 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
960
961 asWords.append('->');
962
963 for oModifier in self.aoOutputs:
964 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
965
966 if fRepr:
967 return '<' + ' '.join(asWords) + '>';
968 return ' '.join(asWords);
969
970 def __str__(self):
971 """ Provide string represenation. """
972 return self.toString(False);
973
974 def __repr__(self):
975 """ Provide unambigious string representation. """
976 return self.toString(True);
977
978class Operand(object):
979 """
980 Instruction operand.
981 """
982
983 def __init__(self, sWhere, sType):
984 assert sWhere in g_kdOpLocations, sWhere;
985 assert sType in g_kdOpTypes, sType;
986 self.sWhere = sWhere; ##< g_kdOpLocations
987 self.sType = sType; ##< g_kdOpTypes
988
989 def usesModRM(self):
990 """ Returns True if using some form of ModR/M encoding. """
991 return self.sType[0] in ['E', 'G', 'M'];
992
993
994
995class Instruction(object): # pylint: disable=too-many-instance-attributes
996 """
997 Instruction.
998 """
999
1000 def __init__(self, sSrcFile, iLine):
1001 ## @name Core attributes.
1002 ## @{
1003 self.sMnemonic = None;
1004 self.sBrief = None;
1005 self.asDescSections = []; # type: list(str)
1006 self.aoMaps = []; # type: list(InstructionMap)
1007 self.aoOperands = []; # type: list(Operand)
1008 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1009 self.sOpcode = None; # type: str
1010 self.sSubOpcode = None; # type: str
1011 self.sEncoding = None;
1012 self.asFlTest = None;
1013 self.asFlModify = None;
1014 self.asFlUndefined = None;
1015 self.asFlSet = None;
1016 self.asFlClear = None;
1017 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1018 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1019 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1020 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1021 self.aoTests = []; # type: list(InstructionTest)
1022 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1023 self.oCpuExpr = None; ##< Some CPU restriction expression...
1024 self.sGroup = None;
1025 self.fUnused = False; ##< Unused instruction.
1026 self.fInvalid = False; ##< Invalid instruction (like UD2).
1027 self.sInvalidStyle = None; ##< Invalid behviour style
1028 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1029 ## @}
1030
1031 ## @name Implementation attributes.
1032 ## @{
1033 self.sStats = None;
1034 self.sFunction = None;
1035 self.fStub = False;
1036 self.fUdStub = False;
1037 ## @}
1038
1039 ## @name Decoding info
1040 ## @{
1041 self.sSrcFile = sSrcFile;
1042 self.iLineCreated = iLine;
1043 self.iLineCompleted = None;
1044 self.cOpTags = 0;
1045 self.iLineFnIemOpMacro = -1;
1046 self.iLineMnemonicMacro = -1;
1047 ## @}
1048
1049 ## @name Intermediate input fields.
1050 ## @{
1051 self.sRawDisOpNo = None;
1052 self.asRawDisParams = [];
1053 self.sRawIemOpFlags = None;
1054 self.sRawOldOpcodes = None;
1055 self.asCopyTests = [];
1056 ## @}
1057
1058 def toString(self, fRepr = False):
1059 """ Turn object into a string. """
1060 aasFields = [];
1061
1062 aasFields.append(['opcode', self.sOpcode]);
1063 aasFields.append(['mnemonic', self.sMnemonic]);
1064 for iOperand, oOperand in enumerate(self.aoOperands):
1065 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1066 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1067 aasFields.append(['encoding', self.sEncoding]);
1068 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1069 aasFields.append(['disenum', self.sDisEnum]);
1070 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1071 aasFields.append(['group', self.sGroup]);
1072 if self.fUnused: aasFields.append(['unused', 'True']);
1073 if self.fInvalid: aasFields.append(['invalid', 'True']);
1074 aasFields.append(['invlstyle', self.sInvalidStyle]);
1075 aasFields.append(['fltest', self.asFlTest]);
1076 aasFields.append(['flmodify', self.asFlModify]);
1077 aasFields.append(['flundef', self.asFlUndefined]);
1078 aasFields.append(['flset', self.asFlSet]);
1079 aasFields.append(['flclear', self.asFlClear]);
1080 aasFields.append(['mincpu', self.sMinCpu]);
1081 aasFields.append(['stats', self.sStats]);
1082 aasFields.append(['sFunction', self.sFunction]);
1083 if self.fStub: aasFields.append(['fStub', 'True']);
1084 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1085 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1086 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1087 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1088
1089 sRet = '<' if fRepr else '';
1090 for sField, sValue in aasFields:
1091 if sValue != None:
1092 if len(sRet) > 1:
1093 sRet += '; ';
1094 sRet += '%s=%s' % (sField, sValue,);
1095 if fRepr:
1096 sRet += '>';
1097
1098 return sRet;
1099
1100 def __str__(self):
1101 """ Provide string represenation. """
1102 return self.toString(False);
1103
1104 def __repr__(self):
1105 """ Provide unambigious string representation. """
1106 return self.toString(True);
1107
1108 def getOpcodeByte(self):
1109 """
1110 Decodes sOpcode into a byte range integer value.
1111 Raises exception if sOpcode is None or invalid.
1112 """
1113 if self.sOpcode is None:
1114 raise Exception('No opcode byte for %s!' % (self,));
1115 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1116
1117 # Full hex byte form.
1118 if sOpcode[:2] == '0x':
1119 return int(sOpcode, 16);
1120
1121 # The /r form:
1122 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
1123 return int(sOpcode[1:]) << 3;
1124
1125 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1126
1127 @staticmethod
1128 def _flagsToIntegerMask(asFlags):
1129 """
1130 Returns the integer mask value for asFlags.
1131 """
1132 uRet = 0;
1133 if asFlags:
1134 for sFlag in asFlags:
1135 sConstant = g_kdEFlagsMnemonics[sFlag];
1136 assert sConstant[0] != '!', sConstant
1137 uRet |= g_kdX86EFlagsConstants[sConstant];
1138 return uRet;
1139
1140 def getTestedFlagsMask(self):
1141 """ Returns asFlTest into a integer mask value """
1142 return self._flagsToIntegerMask(self.asFlTest);
1143
1144 def getModifiedFlagsMask(self):
1145 """ Returns asFlModify into a integer mask value """
1146 return self._flagsToIntegerMask(self.asFlModify);
1147
1148 def getUndefinedFlagsMask(self):
1149 """ Returns asFlUndefined into a integer mask value """
1150 return self._flagsToIntegerMask(self.asFlUndefined);
1151
1152 def getSetFlagsMask(self):
1153 """ Returns asFlSet into a integer mask value """
1154 return self._flagsToIntegerMask(self.asFlSet);
1155
1156 def getClearedFlagsMask(self):
1157 """ Returns asFlClear into a integer mask value """
1158 return self._flagsToIntegerMask(self.asFlClear);
1159
1160
1161## All the instructions.
1162g_aoAllInstructions = []; # type: list(Instruction)
1163
1164## All the instructions indexed by statistics name (opstat).
1165g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1166
1167## All the instructions indexed by function name (opfunction).
1168g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1169
1170## Instructions tagged by oponlytest
1171g_aoOnlyTestInstructions = []; # type: list(Instruction)
1172
1173## Instruction maps.
1174g_dInstructionMaps = {
1175 'one': InstructionMap('one'),
1176 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1177 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1178 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1179 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1180 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1181 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1182 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1183 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1184 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1185 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1186 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1187 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1188 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1189 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1190 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1191 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1192 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1193 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1194 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1195
1196 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1197 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1198 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1199 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1200 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1201 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1202 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1203 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1204 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1205 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1206 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1207 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1208 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1209 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1210
1211 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1212 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1213
1214 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1215 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1216 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1217 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1218 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1219 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1220
1221 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1222 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1223
1224 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1225 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1226 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1227 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1228 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1229 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1230 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1231};
1232
1233
1234
1235class ParserException(Exception):
1236 """ Parser exception """
1237 def __init__(self, sMessage):
1238 Exception.__init__(self, sMessage);
1239
1240
1241class SimpleParser(object):
1242 """
1243 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1244 """
1245
1246 ## @name Parser state.
1247 ## @{
1248 kiCode = 0;
1249 kiCommentMulti = 1;
1250 ## @}
1251
1252 def __init__(self, sSrcFile, asLines, sDefaultMap):
1253 self.sSrcFile = sSrcFile;
1254 self.asLines = asLines;
1255 self.iLine = 0;
1256 self.iState = self.kiCode;
1257 self.sComment = '';
1258 self.iCommentLine = 0;
1259 self.aoCurInstrs = [];
1260
1261 assert sDefaultMap in g_dInstructionMaps;
1262 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1263
1264 self.cTotalInstr = 0;
1265 self.cTotalStubs = 0;
1266 self.cTotalTagged = 0;
1267
1268 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1269 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1270 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1271 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1272 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1273 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1274 self.fDebug = True;
1275
1276 self.dTagHandlers = {
1277 '@opbrief': self.parseTagOpBrief,
1278 '@opdesc': self.parseTagOpDesc,
1279 '@opmnemonic': self.parseTagOpMnemonic,
1280 '@op1': self.parseTagOpOperandN,
1281 '@op2': self.parseTagOpOperandN,
1282 '@op3': self.parseTagOpOperandN,
1283 '@op4': self.parseTagOpOperandN,
1284 '@oppfx': self.parseTagOpPfx,
1285 '@opmaps': self.parseTagOpMaps,
1286 '@opcode': self.parseTagOpcode,
1287 '@opcodesub': self.parseTagOpcodeSub,
1288 '@openc': self.parseTagOpEnc,
1289 '@opfltest': self.parseTagOpEFlags,
1290 '@opflmodify': self.parseTagOpEFlags,
1291 '@opflundef': self.parseTagOpEFlags,
1292 '@opflset': self.parseTagOpEFlags,
1293 '@opflclear': self.parseTagOpEFlags,
1294 '@ophints': self.parseTagOpHints,
1295 '@opdisenum': self.parseTagOpDisEnum,
1296 '@opmincpu': self.parseTagOpMinCpu,
1297 '@opcpuid': self.parseTagOpCpuId,
1298 '@opgroup': self.parseTagOpGroup,
1299 '@opunused': self.parseTagOpUnusedInvalid,
1300 '@opinvalid': self.parseTagOpUnusedInvalid,
1301 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1302 '@optest': self.parseTagOpTest,
1303 '@optestign': self.parseTagOpTestIgnore,
1304 '@optestignore': self.parseTagOpTestIgnore,
1305 '@opcopytests': self.parseTagOpCopyTests,
1306 '@oponlytest': self.parseTagOpOnlyTest,
1307 '@opxcpttype': self.parseTagOpXcptType,
1308 '@opstats': self.parseTagOpStats,
1309 '@opfunction': self.parseTagOpFunction,
1310 '@opdone': self.parseTagOpDone,
1311 };
1312
1313 self.asErrors = [];
1314
1315 def raiseError(self, sMessage):
1316 """
1317 Raise error prefixed with the source and line number.
1318 """
1319 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1320
1321 def raiseCommentError(self, iLineInComment, sMessage):
1322 """
1323 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1324 """
1325 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1326
1327 def error(self, sMessage):
1328 """
1329 Adds an error.
1330 returns False;
1331 """
1332 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1333 return False;
1334
1335 def errorComment(self, iLineInComment, sMessage):
1336 """
1337 Adds a comment error.
1338 returns False;
1339 """
1340 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1341 return False;
1342
1343 def printErrors(self):
1344 """
1345 Print the errors to stderr.
1346 Returns number of errors.
1347 """
1348 if self.asErrors:
1349 sys.stderr.write(u''.join(self.asErrors));
1350 return len(self.asErrors);
1351
1352 def debug(self, sMessage):
1353 """
1354 For debugging.
1355 """
1356 if self.fDebug:
1357 print('debug: %s' % (sMessage,));
1358
1359
1360 def addInstruction(self, iLine = None):
1361 """
1362 Adds an instruction.
1363 """
1364 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1365 g_aoAllInstructions.append(oInstr);
1366 self.aoCurInstrs.append(oInstr);
1367 return oInstr;
1368
1369 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1370 """
1371 Derives the mnemonic and operands from a IEM stats base name like string.
1372 """
1373 if oInstr.sMnemonic is None:
1374 asWords = sStats.split('_');
1375 oInstr.sMnemonic = asWords[0].lower();
1376 if len(asWords) > 1 and not oInstr.aoOperands:
1377 for sType in asWords[1:]:
1378 if sType in g_kdOpTypes:
1379 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1380 else:
1381 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1382 return False;
1383 return True;
1384
1385 def doneInstructionOne(self, oInstr, iLine):
1386 """
1387 Complete the parsing by processing, validating and expanding raw inputs.
1388 """
1389 assert oInstr.iLineCompleted is None;
1390 oInstr.iLineCompleted = iLine;
1391
1392 #
1393 # Specified instructions.
1394 #
1395 if oInstr.cOpTags > 0:
1396 if oInstr.sStats is None:
1397 pass;
1398
1399 #
1400 # Unspecified legacy stuff. We generally only got a few things to go on here.
1401 # /** Opcode 0x0f 0x00 /0. */
1402 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1403 #
1404 else:
1405 #if oInstr.sRawOldOpcodes:
1406 #
1407 #if oInstr.sMnemonic:
1408 pass;
1409
1410 #
1411 # Common defaults.
1412 #
1413
1414 # Guess mnemonic and operands from stats if the former is missing.
1415 if oInstr.sMnemonic is None:
1416 if oInstr.sStats is not None:
1417 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1418 elif oInstr.sFunction is not None:
1419 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1420
1421 # Derive the disassembler op enum constant from the mnemonic.
1422 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1423 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1424
1425 # Derive the IEM statistics base name from mnemonic and operand types.
1426 if oInstr.sStats is None:
1427 if oInstr.sFunction is not None:
1428 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1429 elif oInstr.sMnemonic is not None:
1430 oInstr.sStats = oInstr.sMnemonic;
1431 for oOperand in oInstr.aoOperands:
1432 if oOperand.sType:
1433 oInstr.sStats += '_' + oOperand.sType;
1434
1435 # Derive the IEM function name from mnemonic and operand types.
1436 if oInstr.sFunction is None:
1437 if oInstr.sMnemonic is not None:
1438 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1439 for oOperand in oInstr.aoOperands:
1440 if oOperand.sType:
1441 oInstr.sFunction += '_' + oOperand.sType;
1442 elif oInstr.sStats:
1443 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1444
1445 # Derive encoding from operands.
1446 if oInstr.sEncoding is None:
1447 if not oInstr.aoOperands:
1448 if oInstr.fUnused and oInstr.sSubOpcode:
1449 oInstr.sEncoding = 'ModR/M';
1450 else:
1451 oInstr.sEncoding = 'fixed';
1452 elif oInstr.aoOperands[0].usesModRM():
1453 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1454 oInstr.sEncoding = 'ModR/M+VEX';
1455 else:
1456 oInstr.sEncoding = 'ModR/M';
1457
1458 #
1459 # Apply default map and then add the instruction to all it's groups.
1460 #
1461 if not oInstr.aoMaps:
1462 oInstr.aoMaps = [ self.oDefaultMap, ];
1463 for oMap in oInstr.aoMaps:
1464 oMap.aoInstructions.append(oInstr);
1465
1466 #
1467 # Check the opstat value and add it to the opstat indexed dictionary.
1468 #
1469 if oInstr.sStats:
1470 if oInstr.sStats not in g_dAllInstructionsByStat:
1471 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1472 else:
1473 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1474 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1475
1476 #
1477 # Add to function indexed dictionary. We allow multiple instructions per function.
1478 #
1479 if oInstr.sFunction:
1480 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1481 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1482 else:
1483 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1484
1485 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1486 return True;
1487
1488 def doneInstructions(self, iLineInComment = None):
1489 """
1490 Done with current instruction.
1491 """
1492 for oInstr in self.aoCurInstrs:
1493 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1494 if oInstr.fStub:
1495 self.cTotalStubs += 1;
1496
1497 self.cTotalInstr += len(self.aoCurInstrs);
1498
1499 self.sComment = '';
1500 self.aoCurInstrs = [];
1501 return True;
1502
1503 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1504 """
1505 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1506 is False, only None values and empty strings are replaced.
1507 """
1508 for oInstr in self.aoCurInstrs:
1509 if fOverwrite is not True:
1510 oOldValue = getattr(oInstr, sAttrib);
1511 if oOldValue is not None:
1512 continue;
1513 setattr(oInstr, sAttrib, oValue);
1514
1515 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1516 """
1517 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1518 If fOverwrite is False, only None values and empty strings are replaced.
1519 """
1520 for oInstr in self.aoCurInstrs:
1521 aoArray = getattr(oInstr, sAttrib);
1522 while len(aoArray) <= iEntry:
1523 aoArray.append(None);
1524 if fOverwrite is True or aoArray[iEntry] is None:
1525 aoArray[iEntry] = oValue;
1526
1527 def parseCommentOldOpcode(self, asLines):
1528 """ Deals with 'Opcode 0xff /4' like comments """
1529 asWords = asLines[0].split();
1530 if len(asWords) >= 2 \
1531 and asWords[0] == 'Opcode' \
1532 and ( asWords[1].startswith('0x')
1533 or asWords[1].startswith('0X')):
1534 asWords = asWords[:1];
1535 for iWord, sWord in enumerate(asWords):
1536 if sWord.startswith('0X'):
1537 sWord = '0x' + sWord[:2];
1538 asWords[iWord] = asWords;
1539 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1540
1541 return False;
1542
1543 def ensureInstructionForOpTag(self, iTagLine):
1544 """ Ensure there is an instruction for the op-tag being parsed. """
1545 if not self.aoCurInstrs:
1546 self.addInstruction(self.iCommentLine + iTagLine);
1547 for oInstr in self.aoCurInstrs:
1548 oInstr.cOpTags += 1;
1549 if oInstr.cOpTags == 1:
1550 self.cTotalTagged += 1;
1551 return self.aoCurInstrs[-1];
1552
1553 @staticmethod
1554 def flattenSections(aasSections):
1555 """
1556 Flattens multiline sections into stripped single strings.
1557 Returns list of strings, on section per string.
1558 """
1559 asRet = [];
1560 for asLines in aasSections:
1561 if asLines:
1562 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1563 return asRet;
1564
1565 @staticmethod
1566 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1567 """
1568 Flattens sections into a simple stripped string with newlines as
1569 section breaks. The final section does not sport a trailing newline.
1570 """
1571 # Typical: One section with a single line.
1572 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1573 return aasSections[0][0].strip();
1574
1575 sRet = '';
1576 for iSection, asLines in enumerate(aasSections):
1577 if asLines:
1578 if iSection > 0:
1579 sRet += sSectionSep;
1580 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1581 return sRet;
1582
1583
1584
1585 ## @name Tag parsers
1586 ## @{
1587
1588 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1589 """
1590 Tag: \@opbrief
1591 Value: Text description, multiple sections, appended.
1592
1593 Brief description. If not given, it's the first sentence from @opdesc.
1594 """
1595 oInstr = self.ensureInstructionForOpTag(iTagLine);
1596
1597 # Flatten and validate the value.
1598 sBrief = self.flattenAllSections(aasSections);
1599 if not sBrief:
1600 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1601 if sBrief[-1] != '.':
1602 sBrief = sBrief + '.';
1603 if len(sBrief) > 180:
1604 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1605 offDot = sBrief.find('.');
1606 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1607 offDot = sBrief.find('.', offDot + 1);
1608 if offDot >= 0 and offDot != len(sBrief) - 1:
1609 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1610
1611 # Update the instruction.
1612 if oInstr.sBrief is not None:
1613 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1614 % (sTag, oInstr.sBrief, sBrief,));
1615 _ = iEndLine;
1616 return True;
1617
1618 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1619 """
1620 Tag: \@opdesc
1621 Value: Text description, multiple sections, appended.
1622
1623 It is used to describe instructions.
1624 """
1625 oInstr = self.ensureInstructionForOpTag(iTagLine);
1626 if aasSections:
1627 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1628 return True;
1629
1630 _ = sTag; _ = iEndLine;
1631 return True;
1632
1633 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1634 """
1635 Tag: @opmenmonic
1636 Value: mnemonic
1637
1638 The 'mnemonic' value must be a valid C identifier string. Because of
1639 prefixes, groups and whatnot, there times when the mnemonic isn't that
1640 of an actual assembler mnemonic.
1641 """
1642 oInstr = self.ensureInstructionForOpTag(iTagLine);
1643
1644 # Flatten and validate the value.
1645 sMnemonic = self.flattenAllSections(aasSections);
1646 if not self.oReMnemonic.match(sMnemonic):
1647 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1648 if oInstr.sMnemonic is not None:
1649 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1650 % (sTag, oInstr.sMnemonic, sMnemonic,));
1651 oInstr.sMnemonic = sMnemonic
1652
1653 _ = iEndLine;
1654 return True;
1655
1656 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1657 """
1658 Tags: \@op1, \@op2, \@op3, \@op4
1659 Value: [where:]type
1660
1661 The 'where' value indicates where the operand is found, like the 'reg'
1662 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1663 a list.
1664
1665 The 'type' value indicates the operand type. These follow the types
1666 given in the opcode tables in the CPU reference manuals.
1667 See Instruction.kdOperandTypes for a list.
1668
1669 """
1670 oInstr = self.ensureInstructionForOpTag(iTagLine);
1671 idxOp = int(sTag[-1]) - 1;
1672 assert idxOp >= 0 and idxOp < 4;
1673
1674 # flatten, split up, and validate the "where:type" value.
1675 sFlattened = self.flattenAllSections(aasSections);
1676 asSplit = sFlattened.split(':');
1677 if len(asSplit) == 1:
1678 sType = asSplit[0];
1679 sWhere = None;
1680 elif len(asSplit) == 2:
1681 (sWhere, sType) = asSplit;
1682 else:
1683 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1684
1685 if sType not in g_kdOpTypes:
1686 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1687 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1688 if sWhere is None:
1689 sWhere = g_kdOpTypes[sType][1];
1690 elif sWhere not in g_kdOpLocations:
1691 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1692 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1693
1694 # Insert the operand, refusing to overwrite an existing one.
1695 while idxOp >= len(oInstr.aoOperands):
1696 oInstr.aoOperands.append(None);
1697 if oInstr.aoOperands[idxOp] is not None:
1698 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1699 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1700 sWhere, sType,));
1701 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1702
1703 _ = iEndLine;
1704 return True;
1705
1706 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1707 """
1708 Tag: \@opmaps
1709 Value: map[,map2]
1710
1711 Indicates which maps the instruction is in. There is a default map
1712 associated with each input file.
1713 """
1714 oInstr = self.ensureInstructionForOpTag(iTagLine);
1715
1716 # Flatten, split up and validate the value.
1717 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1718 asMaps = sFlattened.split(',');
1719 if not asMaps:
1720 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1721 for sMap in asMaps:
1722 if sMap not in g_dInstructionMaps:
1723 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1724 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1725
1726 # Add the maps to the current list. Throw errors on duplicates.
1727 for oMap in oInstr.aoMaps:
1728 if oMap.sName in asMaps:
1729 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1730
1731 for sMap in asMaps:
1732 oMap = g_dInstructionMaps[sMap];
1733 if oMap not in oInstr.aoMaps:
1734 oInstr.aoMaps.append(oMap);
1735 else:
1736 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1737
1738 _ = iEndLine;
1739 return True;
1740
1741 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1742 """
1743 Tag: \@oppfx
1744 Value: n/a|none|0x66|0xf3|0xf2
1745
1746 Required prefix for the instruction. (In a (E)VEX context this is the
1747 value of the 'pp' field rather than an actual prefix.)
1748 """
1749 oInstr = self.ensureInstructionForOpTag(iTagLine);
1750
1751 # Flatten and validate the value.
1752 sFlattened = self.flattenAllSections(aasSections);
1753 asPrefixes = sFlattened.split();
1754 if len(asPrefixes) > 1:
1755 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1756
1757 sPrefix = asPrefixes[0].lower();
1758 if sPrefix == 'none':
1759 sPrefix = 'none';
1760 elif sPrefix == 'n/a':
1761 sPrefix = None;
1762 else:
1763 if len(sPrefix) == 2:
1764 sPrefix = '0x' + sPrefix;
1765 if not _isValidOpcodeByte(sPrefix):
1766 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1767
1768 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1769 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1770
1771 # Set it.
1772 if oInstr.sPrefix is not None:
1773 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1774 oInstr.sPrefix = sPrefix;
1775
1776 _ = iEndLine;
1777 return True;
1778
1779 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1780 """
1781 Tag: \@opcode
1782 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1783
1784 The opcode byte or sub-byte for the instruction in the context of a map.
1785 """
1786 oInstr = self.ensureInstructionForOpTag(iTagLine);
1787
1788 # Flatten and validate the value.
1789 sOpcode = self.flattenAllSections(aasSections);
1790 if sOpcode in g_kdSpecialOpcodes:
1791 pass;
1792 elif not _isValidOpcodeByte(sOpcode):
1793 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1794
1795 # Set it.
1796 if oInstr.sOpcode is not None:
1797 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1798 oInstr.sOpcode = sOpcode;
1799
1800 _ = iEndLine;
1801 return True;
1802
1803 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
1804 """
1805 Tag: \@opcodesub
1806 Value: none | 11 mr/reg | !11 mr/reg
1807
1808 This is a simple way of dealing with encodings where the mod=3 and mod!=3
1809 represents exactly two different instructions. The more proper way would
1810 be to go via maps with two members, but this is faster.
1811 """
1812 oInstr = self.ensureInstructionForOpTag(iTagLine);
1813
1814 # Flatten and validate the value.
1815 sSubOpcode = self.flattenAllSections(aasSections);
1816 if sSubOpcode not in g_kdSubOpcodes:
1817 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
1818 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
1819
1820 # Set it.
1821 if oInstr.sSubOpcode is not None:
1822 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1823 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
1824 oInstr.sSubOpcode = sSubOpcode;
1825
1826 _ = iEndLine;
1827 return True;
1828
1829 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1830 """
1831 Tag: \@openc
1832 Value: ModR/M|fixed|prefix|<map name>
1833
1834 The instruction operand encoding style.
1835 """
1836 oInstr = self.ensureInstructionForOpTag(iTagLine);
1837
1838 # Flatten and validate the value.
1839 sEncoding = self.flattenAllSections(aasSections);
1840 if sEncoding in g_kdEncodings:
1841 pass;
1842 elif sEncoding in g_dInstructionMaps:
1843 pass;
1844 elif not _isValidOpcodeByte(sEncoding):
1845 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1846
1847 # Set it.
1848 if oInstr.sEncoding is not None:
1849 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1850 % ( sTag, oInstr.sEncoding, sEncoding,));
1851 oInstr.sEncoding = sEncoding;
1852
1853 _ = iEndLine;
1854 return True;
1855
1856 ## EFlags tag to Instruction attribute name.
1857 kdOpFlagToAttr = {
1858 '@opfltest': 'asFlTest',
1859 '@opflmodify': 'asFlModify',
1860 '@opflundef': 'asFlUndefined',
1861 '@opflset': 'asFlSet',
1862 '@opflclear': 'asFlClear',
1863 };
1864
1865 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1866 """
1867 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1868 Value: <eflags specifier>
1869
1870 """
1871 oInstr = self.ensureInstructionForOpTag(iTagLine);
1872
1873 # Flatten, split up and validate the values.
1874 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1875 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1876 asFlags = [];
1877 else:
1878 fRc = True;
1879 for iFlag, sFlag in enumerate(asFlags):
1880 if sFlag not in g_kdEFlagsMnemonics:
1881 if sFlag.strip() in g_kdEFlagsMnemonics:
1882 asFlags[iFlag] = sFlag.strip();
1883 else:
1884 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1885 if not fRc:
1886 return False;
1887
1888 # Set them.
1889 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1890 if asOld is not None:
1891 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1892 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1893
1894 _ = iEndLine;
1895 return True;
1896
1897 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1898 """
1899 Tag: \@ophints
1900 Value: Comma or space separated list of flags and hints.
1901
1902 This covers the disassembler flags table and more.
1903 """
1904 oInstr = self.ensureInstructionForOpTag(iTagLine);
1905
1906 # Flatten as a space separated list, split it up and validate the values.
1907 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1908 if len(asHints) == 1 and asHints[0].lower() == 'none':
1909 asHints = [];
1910 else:
1911 fRc = True;
1912 for iHint, sHint in enumerate(asHints):
1913 if sHint not in g_kdHints:
1914 if sHint.strip() in g_kdHints:
1915 sHint[iHint] = sHint.strip();
1916 else:
1917 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1918 if not fRc:
1919 return False;
1920
1921 # Append them.
1922 for sHint in asHints:
1923 if sHint not in oInstr.dHints:
1924 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1925 else:
1926 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1927
1928 _ = iEndLine;
1929 return True;
1930
1931 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1932 """
1933 Tag: \@opdisenum
1934 Value: OP_XXXX
1935
1936 This is for select a specific (legacy) disassembler enum value for the
1937 instruction.
1938 """
1939 oInstr = self.ensureInstructionForOpTag(iTagLine);
1940
1941 # Flatten and split.
1942 asWords = self.flattenAllSections(aasSections).split();
1943 if len(asWords) != 1:
1944 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1945 if not asWords:
1946 return False;
1947 sDisEnum = asWords[0];
1948 if not self.oReDisEnum.match(sDisEnum):
1949 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1950 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1951
1952 # Set it.
1953 if oInstr.sDisEnum is not None:
1954 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1955 oInstr.sDisEnum = sDisEnum;
1956
1957 _ = iEndLine;
1958 return True;
1959
1960 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1961 """
1962 Tag: \@opmincpu
1963 Value: <simple CPU name>
1964
1965 Indicates when this instruction was introduced.
1966 """
1967 oInstr = self.ensureInstructionForOpTag(iTagLine);
1968
1969 # Flatten the value, split into words, make sure there's just one, valid it.
1970 asCpus = self.flattenAllSections(aasSections).split();
1971 if len(asCpus) > 1:
1972 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1973
1974 sMinCpu = asCpus[0];
1975 if sMinCpu in g_kdCpuNames:
1976 oInstr.sMinCpu = sMinCpu;
1977 else:
1978 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1979 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1980
1981 # Set it.
1982 if oInstr.sMinCpu is None:
1983 oInstr.sMinCpu = sMinCpu;
1984 elif oInstr.sMinCpu != sMinCpu:
1985 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1986
1987 _ = iEndLine;
1988 return True;
1989
1990 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1991 """
1992 Tag: \@opcpuid
1993 Value: none | <CPUID flag specifier>
1994
1995 CPUID feature bit which is required for the instruction to be present.
1996 """
1997 oInstr = self.ensureInstructionForOpTag(iTagLine);
1998
1999 # Flatten as a space separated list, split it up and validate the values.
2000 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2001 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2002 asCpuIds = [];
2003 else:
2004 fRc = True;
2005 for iCpuId, sCpuId in enumerate(asCpuIds):
2006 if sCpuId not in g_kdCpuIdFlags:
2007 if sCpuId.strip() in g_kdCpuIdFlags:
2008 sCpuId[iCpuId] = sCpuId.strip();
2009 else:
2010 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2011 if not fRc:
2012 return False;
2013
2014 # Append them.
2015 for sCpuId in asCpuIds:
2016 if sCpuId not in oInstr.asCpuIds:
2017 oInstr.asCpuIds.append(sCpuId);
2018 else:
2019 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2020
2021 _ = iEndLine;
2022 return True;
2023
2024 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2025 """
2026 Tag: \@opgroup
2027 Value: op_grp1[_subgrp2[_subsubgrp3]]
2028
2029 Instruction grouping.
2030 """
2031 oInstr = self.ensureInstructionForOpTag(iTagLine);
2032
2033 # Flatten as a space separated list, split it up and validate the values.
2034 asGroups = self.flattenAllSections(aasSections).split();
2035 if len(asGroups) != 1:
2036 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2037 sGroup = asGroups[0];
2038 if not self.oReGroupName.match(sGroup):
2039 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2040 % (sTag, sGroup, self.oReGroupName.pattern));
2041
2042 # Set it.
2043 if oInstr.sGroup is not None:
2044 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2045 oInstr.sGroup = sGroup;
2046
2047 _ = iEndLine;
2048 return True;
2049
2050 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2051 """
2052 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2053 Value: <invalid opcode behaviour style>
2054
2055 The \@opunused indicates the specification is for a currently unused
2056 instruction encoding.
2057
2058 The \@opinvalid indicates the specification is for an invalid currently
2059 instruction encoding (like UD2).
2060
2061 The \@opinvlstyle just indicates how CPUs decode the instruction when
2062 not supported (\@opcpuid, \@opmincpu) or disabled.
2063 """
2064 oInstr = self.ensureInstructionForOpTag(iTagLine);
2065
2066 # Flatten as a space separated list, split it up and validate the values.
2067 asStyles = self.flattenAllSections(aasSections).split();
2068 if len(asStyles) != 1:
2069 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2070 sStyle = asStyles[0];
2071 if sStyle not in g_kdInvalidStyles:
2072 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2073 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2074 # Set it.
2075 if oInstr.sInvalidStyle is not None:
2076 return self.errorComment(iTagLine,
2077 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2078 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2079 oInstr.sInvalidStyle = sStyle;
2080 if sTag == '@opunused':
2081 oInstr.fUnused = True;
2082 elif sTag == '@opinvalid':
2083 oInstr.fInvalid = True;
2084
2085 _ = iEndLine;
2086 return True;
2087
2088 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2089 """
2090 Tag: \@optest
2091 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2092 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2093
2094 The main idea here is to generate basic instruction tests.
2095
2096 The probably simplest way of handling the diverse input, would be to use
2097 it to produce size optimized byte code for a simple interpreter that
2098 modifies the register input and output states.
2099
2100 An alternative to the interpreter would be creating multiple tables,
2101 but that becomes rather complicated wrt what goes where and then to use
2102 them in an efficient manner.
2103 """
2104 oInstr = self.ensureInstructionForOpTag(iTagLine);
2105
2106 #
2107 # Do it section by section.
2108 #
2109 for asSectionLines in aasSections:
2110 #
2111 # Sort the input into outputs, inputs and selector conditions.
2112 #
2113 sFlatSection = self.flattenAllSections([asSectionLines,]);
2114 if not sFlatSection:
2115 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2116 continue;
2117 oTest = InstructionTest(oInstr);
2118
2119 asSelectors = [];
2120 asInputs = [];
2121 asOutputs = [];
2122 asCur = asOutputs;
2123 fRc = True;
2124 asWords = sFlatSection.split();
2125 for iWord in range(len(asWords) - 1, -1, -1):
2126 sWord = asWords[iWord];
2127 # Check for array switchers.
2128 if sWord == '->':
2129 if asCur != asOutputs:
2130 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2131 break;
2132 asCur = asInputs;
2133 elif sWord == '/':
2134 if asCur != asInputs:
2135 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2136 break;
2137 asCur = asSelectors;
2138 else:
2139 asCur.insert(0, sWord);
2140
2141 #
2142 # Validate and add selectors.
2143 #
2144 for sCond in asSelectors:
2145 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2146 oSelector = None;
2147 for sOp in TestSelector.kasCompareOps:
2148 off = sCondExp.find(sOp);
2149 if off >= 0:
2150 sVariable = sCondExp[:off];
2151 sValue = sCondExp[off + len(sOp):];
2152 if sVariable in TestSelector.kdVariables:
2153 if sValue in TestSelector.kdVariables[sVariable]:
2154 oSelector = TestSelector(sVariable, sOp, sValue);
2155 else:
2156 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2157 % ( sTag, sValue, sCond,
2158 TestSelector.kdVariables[sVariable].keys(),));
2159 else:
2160 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2161 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2162 break;
2163 if oSelector is not None:
2164 for oExisting in oTest.aoSelectors:
2165 if oExisting.sVariable == oSelector.sVariable:
2166 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2167 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2168 oTest.aoSelectors.append(oSelector);
2169 else:
2170 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2171
2172 #
2173 # Validate outputs and inputs, adding them to the test as we go along.
2174 #
2175 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2176 asValidFieldKinds = [ 'both', sDesc, ];
2177 for sItem in asItems:
2178 oItem = None;
2179 for sOp in TestInOut.kasOperators:
2180 off = sItem.find(sOp);
2181 if off < 0:
2182 continue;
2183 sField = sItem[:off];
2184 sValueType = sItem[off + len(sOp):];
2185 if sField in TestInOut.kdFields \
2186 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2187 asSplit = sValueType.split(':', 1);
2188 sValue = asSplit[0];
2189 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2190 if sType in TestInOut.kdTypes:
2191 oValid = TestInOut.kdTypes[sType].validate(sValue);
2192 if oValid is True:
2193 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2194 oItem = TestInOut(sField, sOp, sValue, sType);
2195 else:
2196 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2197 % ( sTag, sDesc, sItem, ));
2198 else:
2199 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2200 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2201 else:
2202 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2203 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2204 else:
2205 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2206 % ( sTag, sDesc, sField, sItem,
2207 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2208 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2209 break;
2210 if oItem is not None:
2211 for oExisting in aoDst:
2212 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2213 self.errorComment(iTagLine,
2214 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2215 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2216 aoDst.append(oItem);
2217 else:
2218 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2219
2220 #
2221 # .
2222 #
2223 if fRc:
2224 oInstr.aoTests.append(oTest);
2225 else:
2226 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2227 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2228 % (sTag, asSelectors, asInputs, asOutputs,));
2229
2230 _ = iEndLine;
2231 return True;
2232
2233 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2234 """
2235 Tag: \@optestign | \@optestignore
2236 Value: <value is ignored>
2237
2238 This is a simple trick to ignore a test while debugging another.
2239
2240 See also \@oponlytest.
2241 """
2242 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2243 return True;
2244
2245 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2246 """
2247 Tag: \@opcopytests
2248 Value: <opstat | function> [..]
2249 Example: \@opcopytests add_Eb_Gb
2250
2251 Trick to avoid duplicating tests for different encodings of the same
2252 operation.
2253 """
2254 oInstr = self.ensureInstructionForOpTag(iTagLine);
2255
2256 # Flatten, validate and append the copy job to the instruction. We execute
2257 # them after parsing all the input so we can handle forward references.
2258 asToCopy = self.flattenAllSections(aasSections).split();
2259 if not asToCopy:
2260 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2261 for sToCopy in asToCopy:
2262 if sToCopy not in oInstr.asCopyTests:
2263 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2264 oInstr.asCopyTests.append(sToCopy);
2265 else:
2266 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2267 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2268 else:
2269 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2270
2271 _ = iEndLine;
2272 return True;
2273
2274 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2275 """
2276 Tag: \@oponlytest
2277 Value: none
2278
2279 Only test instructions with this tag. This is a trick that is handy
2280 for singling out one or two new instructions or tests.
2281
2282 See also \@optestignore.
2283 """
2284 oInstr = self.ensureInstructionForOpTag(iTagLine);
2285
2286 # Validate and add instruction to only test dictionary.
2287 sValue = self.flattenAllSections(aasSections).strip();
2288 if sValue:
2289 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2290
2291 if oInstr not in g_aoOnlyTestInstructions:
2292 g_aoOnlyTestInstructions.append(oInstr);
2293
2294 _ = iEndLine;
2295 return True;
2296
2297 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2298 """
2299 Tag: \@opxcpttype
2300 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2301
2302 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2303 """
2304 oInstr = self.ensureInstructionForOpTag(iTagLine);
2305
2306 # Flatten as a space separated list, split it up and validate the values.
2307 asTypes = self.flattenAllSections(aasSections).split();
2308 if len(asTypes) != 1:
2309 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2310 sType = asTypes[0];
2311 if sType not in g_kdXcptTypes:
2312 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2313 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2314 # Set it.
2315 if oInstr.sXcptType is not None:
2316 return self.errorComment(iTagLine,
2317 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2318 % ( sTag, oInstr.sXcptType, sType,));
2319 oInstr.sXcptType = sType;
2320
2321 _ = iEndLine;
2322 return True;
2323
2324 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2325 """
2326 Tag: \@opfunction
2327 Value: <VMM function name>
2328
2329 This is for explicitly setting the IEM function name. Normally we pick
2330 this up from the FNIEMOP_XXX macro invocation after the description, or
2331 generate it from the mnemonic and operands.
2332
2333 It it thought it maybe necessary to set it when specifying instructions
2334 which implementation isn't following immediately or aren't implemented yet.
2335 """
2336 oInstr = self.ensureInstructionForOpTag(iTagLine);
2337
2338 # Flatten and validate the value.
2339 sFunction = self.flattenAllSections(aasSections);
2340 if not self.oReFunctionName.match(sFunction):
2341 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2342 % (sTag, sFunction, self.oReFunctionName.pattern));
2343
2344 if oInstr.sFunction is not None:
2345 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2346 % (sTag, oInstr.sFunction, sFunction,));
2347 oInstr.sFunction = sFunction;
2348
2349 _ = iEndLine;
2350 return True;
2351
2352 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2353 """
2354 Tag: \@opstats
2355 Value: <VMM statistics base name>
2356
2357 This is for explicitly setting the statistics name. Normally we pick
2358 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2359 the mnemonic and operands.
2360
2361 It it thought it maybe necessary to set it when specifying instructions
2362 which implementation isn't following immediately or aren't implemented yet.
2363 """
2364 oInstr = self.ensureInstructionForOpTag(iTagLine);
2365
2366 # Flatten and validate the value.
2367 sStats = self.flattenAllSections(aasSections);
2368 if not self.oReStatsName.match(sStats):
2369 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2370 % (sTag, sStats, self.oReStatsName.pattern));
2371
2372 if oInstr.sStats is not None:
2373 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2374 % (sTag, oInstr.sStats, sStats,));
2375 oInstr.sStats = sStats;
2376
2377 _ = iEndLine;
2378 return True;
2379
2380 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2381 """
2382 Tag: \@opdone
2383 Value: none
2384
2385 Used to explictily flush the instructions that have been specified.
2386 """
2387 sFlattened = self.flattenAllSections(aasSections);
2388 if sFlattened != '':
2389 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2390 _ = sTag; _ = iEndLine;
2391 return self.doneInstructions();
2392
2393 ## @}
2394
2395
2396 def parseComment(self):
2397 """
2398 Parse the current comment (self.sComment).
2399
2400 If it's a opcode specifiying comment, we reset the macro stuff.
2401 """
2402 #
2403 # Reject if comment doesn't seem to contain anything interesting.
2404 #
2405 if self.sComment.find('Opcode') < 0 \
2406 and self.sComment.find('@') < 0:
2407 return False;
2408
2409 #
2410 # Split the comment into lines, removing leading asterisks and spaces.
2411 # Also remove leading and trailing empty lines.
2412 #
2413 asLines = self.sComment.split('\n');
2414 for iLine, sLine in enumerate(asLines):
2415 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2416
2417 while asLines and not asLines[0]:
2418 self.iCommentLine += 1;
2419 asLines.pop(0);
2420
2421 while asLines and not asLines[-1]:
2422 asLines.pop(len(asLines) - 1);
2423
2424 #
2425 # Check for old style: Opcode 0x0f 0x12
2426 #
2427 if asLines[0].startswith('Opcode '):
2428 self.parseCommentOldOpcode(asLines);
2429
2430 #
2431 # Look for @op* tagged data.
2432 #
2433 cOpTags = 0;
2434 sFlatDefault = None;
2435 sCurTag = '@default';
2436 iCurTagLine = 0;
2437 asCurSection = [];
2438 aasSections = [ asCurSection, ];
2439 for iLine, sLine in enumerate(asLines):
2440 if not sLine.startswith('@'):
2441 if sLine:
2442 asCurSection.append(sLine);
2443 elif asCurSection:
2444 asCurSection = [];
2445 aasSections.append(asCurSection);
2446 else:
2447 #
2448 # Process the previous tag.
2449 #
2450 if not asCurSection and len(aasSections) > 1:
2451 aasSections.pop(-1);
2452 if sCurTag in self.dTagHandlers:
2453 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2454 cOpTags += 1;
2455 elif sCurTag.startswith('@op'):
2456 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2457 elif sCurTag == '@default':
2458 sFlatDefault = self.flattenAllSections(aasSections);
2459 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2460 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2461 elif sCurTag in ['@encoding', '@opencoding']:
2462 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2463
2464 #
2465 # New tag.
2466 #
2467 asSplit = sLine.split(None, 1);
2468 sCurTag = asSplit[0].lower();
2469 if len(asSplit) > 1:
2470 asCurSection = [asSplit[1],];
2471 else:
2472 asCurSection = [];
2473 aasSections = [asCurSection, ];
2474 iCurTagLine = iLine;
2475
2476 #
2477 # Process the final tag.
2478 #
2479 if not asCurSection and len(aasSections) > 1:
2480 aasSections.pop(-1);
2481 if sCurTag in self.dTagHandlers:
2482 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2483 cOpTags += 1;
2484 elif sCurTag.startswith('@op'):
2485 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2486 elif sCurTag == '@default':
2487 sFlatDefault = self.flattenAllSections(aasSections);
2488
2489 #
2490 # Don't allow default text in blocks containing @op*.
2491 #
2492 if cOpTags > 0 and sFlatDefault:
2493 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2494
2495 return True;
2496
2497 def parseMacroInvocation(self, sInvocation):
2498 """
2499 Parses a macro invocation.
2500
2501 Returns a tuple, first element is the offset following the macro
2502 invocation. The second element is a list of macro arguments, where the
2503 zero'th is the macro name.
2504 """
2505 # First the name.
2506 offOpen = sInvocation.find('(');
2507 if offOpen <= 0:
2508 self.raiseError("macro invocation open parenthesis not found");
2509 sName = sInvocation[:offOpen].strip();
2510 if not self.oReMacroName.match(sName):
2511 return self.error("invalid macro name '%s'" % (sName,));
2512 asRet = [sName, ];
2513
2514 # Arguments.
2515 iLine = self.iLine;
2516 cDepth = 1;
2517 off = offOpen + 1;
2518 offStart = off;
2519 while cDepth > 0:
2520 if off >= len(sInvocation):
2521 if iLine >= len(self.asLines):
2522 return self.error('macro invocation beyond end of file');
2523 sInvocation += self.asLines[iLine];
2524 iLine += 1;
2525 ch = sInvocation[off];
2526
2527 if ch == ',' or ch == ')':
2528 if cDepth == 1:
2529 asRet.append(sInvocation[offStart:off].strip());
2530 offStart = off + 1;
2531 if ch == ')':
2532 cDepth -= 1;
2533 elif ch == '(':
2534 cDepth += 1;
2535 off += 1;
2536
2537 return (off, asRet);
2538
2539 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2540 """
2541 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2542 """
2543 offHit = sCode.find(sMacro);
2544 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2545 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2546 return (offHit + offAfter, asRet);
2547 return (len(sCode), None);
2548
2549 def findAndParseMacroInvocation(self, sCode, sMacro):
2550 """
2551 Returns None if not found, arguments as per parseMacroInvocation if found.
2552 """
2553 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2554
2555 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2556 """
2557 Returns same as findAndParseMacroInvocation.
2558 """
2559 for sMacro in asMacro:
2560 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2561 if asRet is not None:
2562 return asRet;
2563 return None;
2564
2565 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2566 sDisHints, sIemHints, asOperands):
2567 """
2568 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2569 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2570 """
2571 #
2572 # Some invocation checks.
2573 #
2574 if sUpper != sUpper.upper():
2575 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2576 if sLower != sLower.lower():
2577 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2578 if sUpper.lower() != sLower:
2579 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2580 if not self.oReMnemonic.match(sLower):
2581 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2582
2583 #
2584 # Check if sIemHints tells us to not consider this macro invocation.
2585 #
2586 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2587 return True;
2588
2589 # Apply to the last instruction only for now.
2590 if not self.aoCurInstrs:
2591 self.addInstruction();
2592 oInstr = self.aoCurInstrs[-1];
2593 if oInstr.iLineMnemonicMacro == -1:
2594 oInstr.iLineMnemonicMacro = self.iLine;
2595 else:
2596 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2597 % (sMacro, oInstr.iLineMnemonicMacro,));
2598
2599 # Mnemonic
2600 if oInstr.sMnemonic is None:
2601 oInstr.sMnemonic = sLower;
2602 elif oInstr.sMnemonic != sLower:
2603 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2604
2605 # Process operands.
2606 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2607 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2608 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2609 for iOperand, sType in enumerate(asOperands):
2610 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2611 if sWhere is None:
2612 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2613 if iOperand < len(oInstr.aoOperands): # error recovery.
2614 sWhere = oInstr.aoOperands[iOperand].sWhere;
2615 sType = oInstr.aoOperands[iOperand].sType;
2616 else:
2617 sWhere = 'reg';
2618 sType = 'Gb';
2619 if iOperand == len(oInstr.aoOperands):
2620 oInstr.aoOperands.append(Operand(sWhere, sType))
2621 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2622 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2623 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2624 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2625
2626 # Encoding.
2627 if sForm not in g_kdIemForms:
2628 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2629 else:
2630 if oInstr.sEncoding is None:
2631 oInstr.sEncoding = g_kdIemForms[sForm][0];
2632 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2633 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2634 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2635
2636 # Check the parameter locations for the encoding.
2637 if g_kdIemForms[sForm][1] is not None:
2638 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2639 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2640 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2641 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2642
2643 # Stats.
2644 if not self.oReStatsName.match(sStats):
2645 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2646 elif oInstr.sStats is None:
2647 oInstr.sStats = sStats;
2648 elif oInstr.sStats != sStats:
2649 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2650 % (sMacro, oInstr.sStats, sStats,));
2651
2652 # Process the hints (simply merge with @ophints w/o checking anything).
2653 for sHint in sDisHints.split('|'):
2654 sHint = sHint.strip();
2655 if sHint.startswith('DISOPTYPE_'):
2656 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2657 if sShortHint in g_kdHints:
2658 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2659 else:
2660 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2661 elif sHint != '0':
2662 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2663
2664 for sHint in sIemHints.split('|'):
2665 sHint = sHint.strip();
2666 if sHint.startswith('IEMOPHINT_'):
2667 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2668 if sShortHint in g_kdHints:
2669 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2670 else:
2671 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2672 elif sHint != '0':
2673 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2674
2675
2676 _ = sAsm;
2677 return True;
2678
2679 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2680 """
2681 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2682 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2683 """
2684 if not asOperands:
2685 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2686 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2687 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2688
2689 def checkCodeForMacro(self, sCode):
2690 """
2691 Checks code for relevant macro invocation.
2692 """
2693 #
2694 # Scan macro invocations.
2695 #
2696 if sCode.find('(') > 0:
2697 # Look for instruction decoder function definitions. ASSUME single line.
2698 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2699 [ 'FNIEMOP_DEF',
2700 'FNIEMOP_STUB',
2701 'FNIEMOP_STUB_1',
2702 'FNIEMOP_UD_STUB',
2703 'FNIEMOP_UD_STUB_1' ]);
2704 if asArgs is not None:
2705 sFunction = asArgs[1];
2706
2707 if not self.aoCurInstrs:
2708 self.addInstruction();
2709 for oInstr in self.aoCurInstrs:
2710 if oInstr.iLineFnIemOpMacro == -1:
2711 oInstr.iLineFnIemOpMacro = self.iLine;
2712 else:
2713 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2714 self.setInstrunctionAttrib('sFunction', sFunction);
2715 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2716 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2717 if asArgs[0].find('STUB') > 0:
2718 self.doneInstructions();
2719 return True;
2720
2721 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2722 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2723 if asArgs is not None:
2724 if len(self.aoCurInstrs) == 1:
2725 oInstr = self.aoCurInstrs[0];
2726 if oInstr.sStats is None:
2727 oInstr.sStats = asArgs[1];
2728 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2729
2730 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2731 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2732 if asArgs is not None:
2733 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2734 []);
2735 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2736 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2737 if asArgs is not None:
2738 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2739 [asArgs[6],]);
2740 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2741 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2742 if asArgs is not None:
2743 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2744 [asArgs[6], asArgs[7]]);
2745 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2746 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2747 if asArgs is not None:
2748 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2749 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2750 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2751 # a_fIemHints)
2752 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2753 if asArgs is not None:
2754 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2755 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2756
2757 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2758 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2759 if asArgs is not None:
2760 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2761 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2762 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2763 if asArgs is not None:
2764 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2765 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2766 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2767 if asArgs is not None:
2768 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2769 [asArgs[4], asArgs[5],]);
2770 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2771 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2772 if asArgs is not None:
2773 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2774 [asArgs[4], asArgs[5], asArgs[6],]);
2775 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2776 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2777 if asArgs is not None:
2778 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2779 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2780
2781 return False;
2782
2783
2784 def parse(self):
2785 """
2786 Parses the given file.
2787 Returns number or errors.
2788 Raises exception on fatal trouble.
2789 """
2790 self.debug('Parsing %s' % (self.sSrcFile,));
2791
2792 while self.iLine < len(self.asLines):
2793 sLine = self.asLines[self.iLine];
2794 self.iLine += 1;
2795
2796 # We only look for comments, so only lines with a slash might possibly
2797 # influence the parser state.
2798 if sLine.find('/') >= 0:
2799 #self.debug('line %d: slash' % (self.iLine,));
2800
2801 offLine = 0;
2802 while offLine < len(sLine):
2803 if self.iState == self.kiCode:
2804 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2805 if offHit >= 0:
2806 self.checkCodeForMacro(sLine[offLine:offHit]);
2807 self.sComment = '';
2808 self.iCommentLine = self.iLine;
2809 self.iState = self.kiCommentMulti;
2810 offLine = offHit + 2;
2811 else:
2812 self.checkCodeForMacro(sLine[offLine:]);
2813 offLine = len(sLine);
2814
2815 elif self.iState == self.kiCommentMulti:
2816 offHit = sLine.find('*/', offLine);
2817 if offHit >= 0:
2818 self.sComment += sLine[offLine:offHit];
2819 self.iState = self.kiCode;
2820 offLine = offHit + 2;
2821 self.parseComment();
2822 else:
2823 self.sComment += sLine[offLine:];
2824 offLine = len(sLine);
2825 else:
2826 assert False;
2827
2828 # No slash, but append the line if in multi-line comment.
2829 elif self.iState == self.kiCommentMulti:
2830 #self.debug('line %d: multi' % (self.iLine,));
2831 self.sComment += sLine;
2832
2833 # No slash, but check code line for relevant macro.
2834 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2835 #self.debug('line %d: macro' % (self.iLine,));
2836 self.checkCodeForMacro(sLine);
2837
2838 # If the line is a '}' in the first position, complete the instructions.
2839 elif self.iState == self.kiCode and sLine[0] == '}':
2840 #self.debug('line %d: }' % (self.iLine,));
2841 self.doneInstructions();
2842
2843 self.doneInstructions();
2844 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2845 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2846 return self.printErrors();
2847
2848
2849def __parseFileByName(sSrcFile, sDefaultMap):
2850 """
2851 Parses one source file for instruction specfications.
2852 """
2853 #
2854 # Read sSrcFile into a line array.
2855 #
2856 try:
2857 oFile = open(sSrcFile, "r");
2858 except Exception as oXcpt:
2859 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2860 try:
2861 asLines = oFile.readlines();
2862 except Exception as oXcpt:
2863 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2864 finally:
2865 oFile.close();
2866
2867 #
2868 # Do the parsing.
2869 #
2870 try:
2871 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2872 except ParserException as oXcpt:
2873 print(str(oXcpt));
2874 raise;
2875 except Exception as oXcpt:
2876 raise;
2877
2878 return cErrors;
2879
2880
2881def __doTestCopying():
2882 """
2883 Executes the asCopyTests instructions.
2884 """
2885 asErrors = [];
2886 for oDstInstr in g_aoAllInstructions:
2887 if oDstInstr.asCopyTests:
2888 for sSrcInstr in oDstInstr.asCopyTests:
2889 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
2890 if oSrcInstr:
2891 aoSrcInstrs = [oSrcInstr,];
2892 else:
2893 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
2894 if aoSrcInstrs:
2895 for oSrcInstr in aoSrcInstrs:
2896 if oSrcInstr != oDstInstr:
2897 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
2898 else:
2899 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
2900 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2901 else:
2902 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
2903 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2904
2905 if asErrors:
2906 sys.stderr.write(u''.join(asErrors));
2907 return len(asErrors);
2908
2909
2910def __applyOnlyTest():
2911 """
2912 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
2913 all other instructions so that only these get tested.
2914 """
2915 if g_aoOnlyTestInstructions:
2916 for oInstr in g_aoAllInstructions:
2917 if oInstr.aoTests:
2918 if oInstr not in g_aoOnlyTestInstructions:
2919 oInstr.aoTests = [];
2920 return 0;
2921
2922def __parseAll():
2923 """
2924 Parses all the IEMAllInstruction*.cpp.h files.
2925
2926 Raises exception on failure.
2927 """
2928 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2929 cErrors = 0;
2930 for sDefaultMap, sName in [
2931 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2932 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2933 ]:
2934 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2935 cErrors += __doTestCopying();
2936 cErrors += __applyOnlyTest();
2937
2938 if cErrors != 0:
2939 #raise Exception('%d parse errors' % (cErrors,));
2940 sys.exit(1);
2941 return True;
2942
2943
2944
2945__parseAll();
2946
2947
2948#
2949# Generators (may perhaps move later).
2950#
2951def generateDisassemblerTables(oDstFile = sys.stdout):
2952 """
2953 Generates disassembler tables.
2954 """
2955
2956 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2957 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2958 assert oMap.sName == sName;
2959 asLines = [];
2960
2961 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2962 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2963 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2964 asLines.append('{');
2965
2966 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2967
2968 aoTableOrder = oMap.getInstructionsInTableOrder();
2969 for iInstr, oInstr in enumerate(aoTableOrder):
2970
2971 if (iInstr & 0xf) == 0:
2972 if iInstr != 0:
2973 asLines.append('');
2974 asLines.append(' /* %x */' % (iInstr >> 4,));
2975
2976 if oInstr is None:
2977 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2978 elif isinstance(oInstr, list):
2979 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2980 else:
2981 sMacro = 'OP';
2982 cMaxOperands = 3;
2983 if len(oInstr.aoOperands) > 3:
2984 sMacro = 'OPVEX'
2985 cMaxOperands = 4;
2986 assert len(oInstr.aoOperands) <= cMaxOperands;
2987
2988 #
2989 # Format string.
2990 #
2991 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2992 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2993 sTmp += ' ' if iOperand == 0 else ',';
2994 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2995 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2996 else:
2997 sTmp += g_kdOpTypes[oOperand.sType][2];
2998 sTmp += '",';
2999 asColumns = [ sTmp, ];
3000
3001 #
3002 # Decoders.
3003 #
3004 iStart = len(asColumns);
3005 if oInstr.sEncoding is None:
3006 pass;
3007 elif oInstr.sEncoding == 'ModR/M':
3008 # ASSUME the first operand is using the ModR/M encoding
3009 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3010 asColumns.append('IDX_ParseModRM,');
3011 ## @todo IDX_ParseVexDest
3012 # Is second operand using ModR/M too?
3013 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3014 asColumns.append('IDX_UseModRM,')
3015 elif oInstr.sEncoding in [ 'prefix', ]:
3016 for oOperand in oInstr.aoOperands:
3017 asColumns.append('0,');
3018 elif oInstr.sEncoding in [ 'fixed' ]:
3019 pass;
3020 elif oInstr.sEncoding == 'vex2':
3021 asColumns.append('IDX_ParseVex2b,')
3022 elif oInstr.sEncoding == 'vex3':
3023 asColumns.append('IDX_ParseVex3b,')
3024 elif oInstr.sEncoding in g_dInstructionMaps:
3025 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3026 else:
3027 ## @todo
3028 #IDX_ParseTwoByteEsc,
3029 #IDX_ParseGrp1,
3030 #IDX_ParseShiftGrp2,
3031 #IDX_ParseGrp3,
3032 #IDX_ParseGrp4,
3033 #IDX_ParseGrp5,
3034 #IDX_Parse3DNow,
3035 #IDX_ParseGrp6,
3036 #IDX_ParseGrp7,
3037 #IDX_ParseGrp8,
3038 #IDX_ParseGrp9,
3039 #IDX_ParseGrp10,
3040 #IDX_ParseGrp12,
3041 #IDX_ParseGrp13,
3042 #IDX_ParseGrp14,
3043 #IDX_ParseGrp15,
3044 #IDX_ParseGrp16,
3045 #IDX_ParseThreeByteEsc4,
3046 #IDX_ParseThreeByteEsc5,
3047 #IDX_ParseModFence,
3048 #IDX_ParseEscFP,
3049 #IDX_ParseNopPause,
3050 #IDX_ParseInvOpModRM,
3051 assert False, str(oInstr);
3052
3053 # Check for immediates and stuff in the remaining operands.
3054 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3055 sIdx = g_kdOpTypes[oOperand.sType][0];
3056 if sIdx != 'IDX_UseModRM':
3057 asColumns.append(sIdx + ',');
3058 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3059
3060 #
3061 # Opcode and operands.
3062 #
3063 assert oInstr.sDisEnum, str(oInstr);
3064 asColumns.append(oInstr.sDisEnum + ',');
3065 iStart = len(asColumns)
3066 for oOperand in oInstr.aoOperands:
3067 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3068 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3069
3070 #
3071 # Flags.
3072 #
3073 sTmp = '';
3074 for sHint in sorted(oInstr.dHints.keys()):
3075 sDefine = g_kdHints[sHint];
3076 if sDefine.startswith('DISOPTYPE_'):
3077 if sTmp:
3078 sTmp += ' | ' + sDefine;
3079 else:
3080 sTmp += sDefine;
3081 if sTmp:
3082 sTmp += '),';
3083 else:
3084 sTmp += '0),';
3085 asColumns.append(sTmp);
3086
3087 #
3088 # Format the columns into a line.
3089 #
3090 sLine = '';
3091 for i, s in enumerate(asColumns):
3092 if len(sLine) < aoffColumns[i]:
3093 sLine += ' ' * (aoffColumns[i] - len(sLine));
3094 else:
3095 sLine += ' ';
3096 sLine += s;
3097
3098 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3099 # DISOPTYPE_HARMLESS),
3100 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3101 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3102
3103 asLines.append(sLine);
3104
3105 asLines.append('};');
3106 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3107
3108 #
3109 # Write out the lines.
3110 #
3111 oDstFile.write('\n'.join(asLines));
3112 oDstFile.write('\n');
3113 break; #for now
3114
3115if __name__ == '__main__':
3116 generateDisassemblerTables();
3117
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette