VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66323

Last change on this file since 66323 was 66323, checked in by vboxsync, 8 years ago

IEM: Implemented movq Wq,Vq (66 0f d6).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 127.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66323 2017-03-29 08:03:19Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66323 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
164 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
165 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
166 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
167 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
168 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
169
170 # ModR/M.rm - register only.
171 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
172 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
173
174 # ModR/M.rm - memory only.
175 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
176 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
177
178 # ModR/M.reg
179 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
180 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
181 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
182 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
183 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
184 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
185 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
186 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
187 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
188
189 # Immediate values.
190 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
191 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
192 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
193 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
194 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
195 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
196
197 # Address operands (no ModR/M).
198 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
199 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
200
201 # Relative jump targets
202 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
203 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
204
205 # DS:rSI
206 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
207 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
208 # ES:rDI
209 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
210 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
211
212 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
213
214 # Fixed registers.
215 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
216 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
217 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
218 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
219 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
220 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
221 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
222 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
223};
224
225# IDX_ParseFixedReg
226# IDX_ParseVexDest
227
228
229## IEMFORM_XXX mappings.
230g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
231 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
232 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
233 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
234 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
235 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
236 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
237 'M': ( 'ModR/M', [ 'rm', ], ),
238 'M_REG': ( 'ModR/M', [ 'rm', ], ),
239 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
240 'R': ( 'ModR/M', [ 'reg', ], ),
241 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
242 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
243 'FIXED': ( 'fixed', None, )
244};
245
246## \@oppfx values.
247g_kdPrefixes = {
248 'none': [],
249 '0x66': [],
250 '0xf3': [],
251 '0xf2': [],
252};
253
254## Special \@opcode tag values.
255g_kdSpecialOpcodes = {
256 '/reg': [],
257 'mr/reg': [],
258 '11 /reg': [],
259 '!11 /reg': [],
260 '11 mr/reg': [],
261 '!11 mr/reg': [],
262};
263
264## Special \@opcodesub tag values.
265g_kdSubOpcodes = {
266 'none': [ None, ],
267 '11 mr/reg': [ '11 mr/reg', ],
268 '11': [ '11 mr/reg', ], ##< alias
269 '!11 mr/reg': [ '!11 mr/reg', ],
270 '!11': [ '!11 mr/reg', ], ##< alias
271};
272
273## Valid values for \@openc
274g_kdEncodings = {
275 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
276 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
277 'prefix': [ None, ], ##< Prefix
278};
279
280## \@opunused, \@opinvalid, \@opinvlstyle
281g_kdInvalidStyles = {
282 'immediate': [], ##< CPU stops decoding immediately after the opcode.
283 'intel-modrm': [], ##< Intel decodes ModR/M.
284 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
285 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
286 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
287};
288
289g_kdCpuNames = {
290 '8086': (),
291 '80186': (),
292 '80286': (),
293 '80386': (),
294 '80486': (),
295};
296
297## \@opcpuid
298g_kdCpuIdFlags = {
299 'vme': 'X86_CPUID_FEATURE_EDX_VME',
300 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
301 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
302 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
303 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
304 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
305 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
306 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
307 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
308 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
309 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
310 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
311 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
312 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
313 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
314 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
315 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
316 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
317 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
318 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
319 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
320 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
321 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
322 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
323 'aes': 'X86_CPUID_FEATURE_ECX_AES',
324 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
325 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
326 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
327 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
328
329 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
330 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
331 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
332 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
333 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
334 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
335 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
336 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
337 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
338 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
339};
340
341## \@ophints values.
342g_kdHints = {
343 'invalid': 'DISOPTYPE_INVALID', ##<
344 'harmless': 'DISOPTYPE_HARMLESS', ##<
345 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
346 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
347 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
348 'portio': 'DISOPTYPE_PORTIO', ##<
349 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
350 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
351 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
352 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
353 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
354 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
355 'illegal': 'DISOPTYPE_ILLEGAL', ##<
356 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
357 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
358 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
359 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
360 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
361 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
362 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
363 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
364 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
365 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
366 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
367 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
368 ## (only in 16 & 32 bits mode!)
369 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
370 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
371 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
372 'ignores_op_size': '', ##< Ignores both operand size prefixes.
373 'lock_allowed': '', ##< Lock prefix allowed.
374};
375
376## \@opxcpttype values (see SDMv2 2.4, 2.7).
377g_kdXcptTypes = {
378 'none': [],
379 '1': [],
380 '2': [],
381 '3': [],
382 '4': [],
383 '4UA': [],
384 '5': [],
385 '6': [],
386 '7': [],
387 '8': [],
388 '11': [],
389 '12': [],
390 'E1': [],
391 'E1NF': [],
392 'E2': [],
393 'E3': [],
394 'E3NF': [],
395 'E4': [],
396 'E4NF': [],
397 'E5': [],
398 'E5NF': [],
399 'E6': [],
400 'E6NF': [],
401 'E7NF': [],
402 'E9': [],
403 'E9NF': [],
404 'E10': [],
405 'E11': [],
406 'E12': [],
407 'E12NF': [],
408};
409
410
411def _isValidOpcodeByte(sOpcode):
412 """
413 Checks if sOpcode is a valid lower case opcode byte.
414 Returns true/false.
415 """
416 if len(sOpcode) == 4:
417 if sOpcode[:2] == '0x':
418 if sOpcode[2] in '0123456789abcdef':
419 if sOpcode[3] in '0123456789abcdef':
420 return True;
421 return False;
422
423
424class InstructionMap(object):
425 """
426 Instruction map.
427
428 The opcode map provides the lead opcode bytes (empty for the one byte
429 opcode map). An instruction can be member of multiple opcode maps as long
430 as it uses the same opcode value within the map (because of VEX).
431 """
432
433 kdEncodings = {
434 'legacy': [],
435 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
436 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
437 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
438 'xop8': [], ##< XOP prefix with vvvvv = 8
439 'xop9': [], ##< XOP prefix with vvvvv = 9
440 'xop10': [], ##< XOP prefix with vvvvv = 10
441 };
442 ## Selectors.
443 ## The first value is the number of table entries required by a
444 ## decoder or disassembler for this type of selector.
445 kdSelectors = {
446 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
447 '/r': [ 8, ], ##< modrm.reg selects the instruction.
448 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
449 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
450 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
451 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
452 };
453
454 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
455 assert sSelector in self.kdSelectors;
456 assert sEncoding in self.kdEncodings;
457 if asLeadOpcodes is None:
458 asLeadOpcodes = [];
459 else:
460 for sOpcode in asLeadOpcodes:
461 assert _isValidOpcodeByte(sOpcode);
462 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
463
464 self.sName = sName;
465 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
466 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
467 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
468 self.aoInstructions = []; # type: Instruction
469 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
470
471 def getTableSize(self):
472 """
473 Number of table entries. This corresponds directly to the selector.
474 """
475 return self.kdSelectors[self.sSelector][0];
476
477 def getInstructionIndex(self, oInstr):
478 """
479 Returns the table index for the instruction.
480 """
481 bOpcode = oInstr.getOpcodeByte();
482
483 # The byte selector is simple. We need a full opcode byte and need just return it.
484 if self.sSelector == 'byte':
485 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
486 return bOpcode;
487
488 # The other selectors needs masking and shifting.
489 if self.sSelector == '/r':
490 return (bOpcode >> 3) & 0x7;
491
492 if self.sSelector == 'mod /r':
493 return (bOpcode >> 3) & 0x1f;
494
495 if self.sSelector == '!11 /r':
496 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
497 return (bOpcode >> 3) & 0x7;
498
499 if self.sSelector == '11 /r':
500 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
501 return (bOpcode >> 3) & 0x7;
502
503 if self.sSelector == '11':
504 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
505 return bOpcode & 0x3f;
506
507 assert False, self.sSelector;
508 return -1;
509
510 def getInstructionsInTableOrder(self):
511 """
512 Get instructions in table order.
513
514 Returns array of instructions. Normally there is exactly one
515 instruction per entry. However the entry could also be None if
516 not instruction was specified for that opcode value. Or there
517 could be a list of instructions to deal with special encodings
518 where for instance prefix (e.g. REX.W) encodes a different
519 instruction or different CPUs have different instructions or
520 prefixes in the same place.
521 """
522 # Start with empty table.
523 cTable = self.getTableSize();
524 aoTable = [None] * cTable;
525
526 # Insert the instructions.
527 for oInstr in self.aoInstructions:
528 if oInstr.sOpcode:
529 idxOpcode = self.getInstructionIndex(oInstr);
530 assert idxOpcode < cTable, str(idxOpcode);
531
532 oExisting = aoTable[idxOpcode];
533 if oExisting is None:
534 aoTable[idxOpcode] = oInstr;
535 elif not isinstance(oExisting, list):
536 aoTable[idxOpcode] = list([oExisting, oInstr]);
537 else:
538 oExisting.append(oInstr);
539
540 return aoTable;
541
542
543 def getDisasTableName(self):
544 """
545 Returns the disassembler table name for this map.
546 """
547 sName = 'g_aDisas';
548 for sWord in self.sName.split('_'):
549 if sWord == 'm': # suffix indicating modrm.mod==mem
550 sName += '_m';
551 elif sWord == 'r': # suffix indicating modrm.mod==reg
552 sName += '_r';
553 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
554 sName += '_' + sWord;
555 else:
556 sWord = sWord.replace('grp', 'Grp');
557 sWord = sWord.replace('map', 'Map');
558 sName += sWord[0].upper() + sWord[1:];
559 return sName;
560
561
562class TestType(object):
563 """
564 Test value type.
565
566 This base class deals with integer like values. The fUnsigned constructor
567 parameter indicates the default stance on zero vs sign extending. It is
568 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
569 """
570 def __init__(self, sName, acbSizes = None, fUnsigned = True):
571 self.sName = sName;
572 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
573 self.fUnsigned = fUnsigned;
574
575 class BadValue(Exception):
576 """ Bad value exception. """
577 def __init__(self, sMessage):
578 Exception.__init__(self, sMessage);
579 self.sMessage = sMessage;
580
581 ## For ascii ~ operator.
582 kdHexInv = {
583 '0': 'f',
584 '1': 'e',
585 '2': 'd',
586 '3': 'c',
587 '4': 'b',
588 '5': 'a',
589 '6': '9',
590 '7': '8',
591 '8': '7',
592 '9': '6',
593 'a': '5',
594 'b': '4',
595 'c': '3',
596 'd': '2',
597 'e': '1',
598 'f': '0',
599 };
600
601 def get(self, sValue):
602 """
603 Get the shortest normal sized byte representation of oValue.
604
605 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
606 The latter form is for AND+OR pairs where the first entry is what to
607 AND with the field and the second the one or OR with.
608
609 Raises BadValue if invalid value.
610 """
611 if not sValue:
612 raise TestType.BadValue('empty value');
613
614 # Deal with sign and detect hexadecimal or decimal.
615 fSignExtend = not self.fUnsigned;
616 if sValue[0] == '-' or sValue[0] == '+':
617 fSignExtend = True;
618 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
619 else:
620 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
621
622 # try convert it to long integer.
623 try:
624 iValue = long(sValue, 16 if fHex else 10);
625 except Exception as oXcpt:
626 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
627
628 # Convert the hex string and pad it to a decent value. Negative values
629 # needs to be manually converted to something non-negative (~-n + 1).
630 if iValue >= 0:
631 sHex = hex(iValue);
632 if sys.version_info[0] < 3:
633 assert sHex[-1] == 'L';
634 sHex = sHex[:-1];
635 assert sHex[:2] == '0x';
636 sHex = sHex[2:];
637 else:
638 sHex = hex(-iValue - 1);
639 if sys.version_info[0] < 3:
640 assert sHex[-1] == 'L';
641 sHex = sHex[:-1];
642 assert sHex[:2] == '0x';
643 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
644 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
645 sHex = 'f' + sHex;
646
647 cDigits = len(sHex);
648 if cDigits <= self.acbSizes[-1] * 2:
649 for cb in self.acbSizes:
650 cNaturalDigits = cb * 2;
651 if cDigits <= cNaturalDigits:
652 break;
653 else:
654 cNaturalDigits = self.acbSizes[-1] * 2;
655 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
656 assert isinstance(cNaturalDigits, int)
657
658 if cNaturalDigits != cDigits:
659 cNeeded = cNaturalDigits - cDigits;
660 if iValue >= 0:
661 sHex = ('0' * cNeeded) + sHex;
662 else:
663 sHex = ('f' * cNeeded) + sHex;
664
665 # Invert and convert to bytearray and return it.
666 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
667
668 return ((fSignExtend, abValue),);
669
670 def validate(self, sValue):
671 """
672 Returns True if value is okay, error message on failure.
673 """
674 try:
675 self.get(sValue);
676 except TestType.BadValue as oXcpt:
677 return oXcpt.sMessage;
678 return True;
679
680 def isAndOrPair(self, sValue):
681 """
682 Checks if sValue is a pair.
683 """
684 _ = sValue;
685 return False;
686
687
688class TestTypeEflags(TestType):
689 """
690 Special value parsing for EFLAGS/RFLAGS/FLAGS.
691 """
692
693 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
694
695 def __init__(self, sName):
696 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
697
698 def get(self, sValue):
699 fClear = 0;
700 fSet = 0;
701 for sFlag in sValue.split(','):
702 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
703 if sConstant is None:
704 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
705 if sConstant[0] == '!':
706 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
707 else:
708 fSet |= g_kdX86EFlagsConstants[sConstant];
709
710 aoSet = TestType.get(self, '0x%x' % (fSet,));
711 if fClear != 0:
712 aoClear = TestType.get(self, '%#x' % (fClear,))
713 assert self.isAndOrPair(sValue) is True;
714 return (aoClear[0], aoSet[0]);
715 assert self.isAndOrPair(sValue) is False;
716 return aoSet;
717
718 def isAndOrPair(self, sValue):
719 for sZeroFlag in self.kdZeroValueFlags:
720 if sValue.find(sZeroFlag) >= 0:
721 return True;
722 return False;
723
724
725
726class TestInOut(object):
727 """
728 One input or output state modifier.
729
730 This should be thought as values to modify BS3REGCTX and extended (needs
731 to be structured) state.
732 """
733 ## Assigned operators.
734 kasOperators = [
735 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
736 '&~=',
737 '&=',
738 '|=',
739 '='
740 ];
741 ## Types
742 kdTypes = {
743 'uint': TestType('uint', fUnsigned = True),
744 'int': TestType('int'),
745 'efl': TestTypeEflags('efl'),
746 };
747 ## CPU context fields.
748 kdFields = {
749 # name: ( default type, [both|input|output], )
750 # Operands.
751 'op1': ( 'uint', 'both', ), ## \@op1
752 'op2': ( 'uint', 'both', ), ## \@op2
753 'op3': ( 'uint', 'both', ), ## \@op3
754 'op4': ( 'uint', 'both', ), ## \@op4
755 # Flags.
756 'efl': ( 'efl', 'both', ),
757 'efl_undef': ( 'uint', 'output', ),
758 # 8-bit GPRs.
759 'al': ( 'uint', 'both', ),
760 'cl': ( 'uint', 'both', ),
761 'dl': ( 'uint', 'both', ),
762 'bl': ( 'uint', 'both', ),
763 'ah': ( 'uint', 'both', ),
764 'ch': ( 'uint', 'both', ),
765 'dh': ( 'uint', 'both', ),
766 'bh': ( 'uint', 'both', ),
767 'r8l': ( 'uint', 'both', ),
768 'r9l': ( 'uint', 'both', ),
769 'r10l': ( 'uint', 'both', ),
770 'r11l': ( 'uint', 'both', ),
771 'r12l': ( 'uint', 'both', ),
772 'r13l': ( 'uint', 'both', ),
773 'r14l': ( 'uint', 'both', ),
774 'r15l': ( 'uint', 'both', ),
775 # 16-bit GPRs.
776 'ax': ( 'uint', 'both', ),
777 'dx': ( 'uint', 'both', ),
778 'cx': ( 'uint', 'both', ),
779 'bx': ( 'uint', 'both', ),
780 'sp': ( 'uint', 'both', ),
781 'bp': ( 'uint', 'both', ),
782 'si': ( 'uint', 'both', ),
783 'di': ( 'uint', 'both', ),
784 'r8w': ( 'uint', 'both', ),
785 'r9w': ( 'uint', 'both', ),
786 'r10w': ( 'uint', 'both', ),
787 'r11w': ( 'uint', 'both', ),
788 'r12w': ( 'uint', 'both', ),
789 'r13w': ( 'uint', 'both', ),
790 'r14w': ( 'uint', 'both', ),
791 'r15w': ( 'uint', 'both', ),
792 # 32-bit GPRs.
793 'eax': ( 'uint', 'both', ),
794 'edx': ( 'uint', 'both', ),
795 'ecx': ( 'uint', 'both', ),
796 'ebx': ( 'uint', 'both', ),
797 'esp': ( 'uint', 'both', ),
798 'ebp': ( 'uint', 'both', ),
799 'esi': ( 'uint', 'both', ),
800 'edi': ( 'uint', 'both', ),
801 'r8d': ( 'uint', 'both', ),
802 'r9d': ( 'uint', 'both', ),
803 'r10d': ( 'uint', 'both', ),
804 'r11d': ( 'uint', 'both', ),
805 'r12d': ( 'uint', 'both', ),
806 'r13d': ( 'uint', 'both', ),
807 'r14d': ( 'uint', 'both', ),
808 'r15d': ( 'uint', 'both', ),
809 # 64-bit GPRs.
810 'rax': ( 'uint', 'both', ),
811 'rdx': ( 'uint', 'both', ),
812 'rcx': ( 'uint', 'both', ),
813 'rbx': ( 'uint', 'both', ),
814 'rsp': ( 'uint', 'both', ),
815 'rbp': ( 'uint', 'both', ),
816 'rsi': ( 'uint', 'both', ),
817 'rdi': ( 'uint', 'both', ),
818 'r8': ( 'uint', 'both', ),
819 'r9': ( 'uint', 'both', ),
820 'r10': ( 'uint', 'both', ),
821 'r11': ( 'uint', 'both', ),
822 'r12': ( 'uint', 'both', ),
823 'r13': ( 'uint', 'both', ),
824 'r14': ( 'uint', 'both', ),
825 'r15': ( 'uint', 'both', ),
826 # 16-bit, 32-bit or 64-bit registers according to operand size.
827 'oz.rax': ( 'uint', 'both', ),
828 'oz.rdx': ( 'uint', 'both', ),
829 'oz.rcx': ( 'uint', 'both', ),
830 'oz.rbx': ( 'uint', 'both', ),
831 'oz.rsp': ( 'uint', 'both', ),
832 'oz.rbp': ( 'uint', 'both', ),
833 'oz.rsi': ( 'uint', 'both', ),
834 'oz.rdi': ( 'uint', 'both', ),
835 'oz.r8': ( 'uint', 'both', ),
836 'oz.r9': ( 'uint', 'both', ),
837 'oz.r10': ( 'uint', 'both', ),
838 'oz.r11': ( 'uint', 'both', ),
839 'oz.r12': ( 'uint', 'both', ),
840 'oz.r13': ( 'uint', 'both', ),
841 'oz.r14': ( 'uint', 'both', ),
842 'oz.r15': ( 'uint', 'both', ),
843 # Special ones.
844 'value.xcpt': ( 'uint', 'output', ),
845 };
846
847 def __init__(self, sField, sOp, sValue, sType):
848 assert sField in self.kdFields;
849 assert sOp in self.kasOperators;
850 self.sField = sField;
851 self.sOp = sOp;
852 self.sValue = sValue;
853 self.sType = sType;
854 assert isinstance(sField, str);
855 assert isinstance(sOp, str);
856 assert isinstance(sType, str);
857 assert isinstance(sValue, str);
858
859
860class TestSelector(object):
861 """
862 One selector for an instruction test.
863 """
864 ## Selector compare operators.
865 kasCompareOps = [ '==', '!=' ];
866 ## Selector variables and their valid values.
867 kdVariables = {
868 # Operand size.
869 'size': {
870 'o16': 'size_o16',
871 'o32': 'size_o32',
872 'o64': 'size_o64',
873 },
874 # Execution ring.
875 'ring': {
876 '0': 'ring_0',
877 '1': 'ring_1',
878 '2': 'ring_2',
879 '3': 'ring_3',
880 '0..2': 'ring_0_thru_2',
881 '1..3': 'ring_1_thru_3',
882 },
883 # Basic code mode.
884 'codebits': {
885 '64': 'code_64bit',
886 '32': 'code_32bit',
887 '16': 'code_16bit',
888 },
889 # cpu modes.
890 'mode': {
891 'real': 'mode_real',
892 'prot': 'mode_prot',
893 'long': 'mode_long',
894 'v86': 'mode_v86',
895 'smm': 'mode_smm',
896 'vmx': 'mode_vmx',
897 'svm': 'mode_svm',
898 },
899 # paging on/off
900 'paging': {
901 'on': 'paging_on',
902 'off': 'paging_off',
903 },
904 };
905 ## Selector shorthand predicates.
906 ## These translates into variable expressions.
907 kdPredicates = {
908 'o16': 'size==o16',
909 'o32': 'size==o32',
910 'o64': 'size==o64',
911 'ring0': 'ring==0',
912 '!ring0': 'ring==1..3',
913 'ring1': 'ring==1',
914 'ring2': 'ring==2',
915 'ring3': 'ring==3',
916 'user': 'ring==3',
917 'supervisor': 'ring==0..2',
918 'real': 'mode==real',
919 'prot': 'mode==prot',
920 'long': 'mode==long',
921 'v86': 'mode==v86',
922 'smm': 'mode==smm',
923 'vmx': 'mode==vmx',
924 'svm': 'mode==svm',
925 'paging': 'paging==on',
926 '!paging': 'paging==off',
927 };
928
929 def __init__(self, sVariable, sOp, sValue):
930 assert sVariable in self.kdVariables;
931 assert sOp in self.kasCompareOps;
932 assert sValue in self.kdVariables[sVariable];
933 self.sVariable = sVariable;
934 self.sOp = sOp;
935 self.sValue = sValue;
936
937
938class InstructionTest(object):
939 """
940 Instruction test.
941 """
942
943 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
944 self.oInstr = oInstr; # type: InstructionTest
945 self.aoInputs = []; # type: list(TestInOut)
946 self.aoOutputs = []; # type: list(TestInOut)
947 self.aoSelectors = []; # type: list(TestSelector)
948
949 def toString(self, fRepr = False):
950 """
951 Converts it to string representation.
952 """
953 asWords = [];
954 if self.aoSelectors:
955 for oSelector in self.aoSelectors:
956 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
957 asWords.append('/');
958
959 for oModifier in self.aoInputs:
960 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
961
962 asWords.append('->');
963
964 for oModifier in self.aoOutputs:
965 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
966
967 if fRepr:
968 return '<' + ' '.join(asWords) + '>';
969 return ' '.join(asWords);
970
971 def __str__(self):
972 """ Provide string represenation. """
973 return self.toString(False);
974
975 def __repr__(self):
976 """ Provide unambigious string representation. """
977 return self.toString(True);
978
979class Operand(object):
980 """
981 Instruction operand.
982 """
983
984 def __init__(self, sWhere, sType):
985 assert sWhere in g_kdOpLocations, sWhere;
986 assert sType in g_kdOpTypes, sType;
987 self.sWhere = sWhere; ##< g_kdOpLocations
988 self.sType = sType; ##< g_kdOpTypes
989
990 def usesModRM(self):
991 """ Returns True if using some form of ModR/M encoding. """
992 return self.sType[0] in ['E', 'G', 'M'];
993
994
995
996class Instruction(object): # pylint: disable=too-many-instance-attributes
997 """
998 Instruction.
999 """
1000
1001 def __init__(self, sSrcFile, iLine):
1002 ## @name Core attributes.
1003 ## @{
1004 self.sMnemonic = None;
1005 self.sBrief = None;
1006 self.asDescSections = []; # type: list(str)
1007 self.aoMaps = []; # type: list(InstructionMap)
1008 self.aoOperands = []; # type: list(Operand)
1009 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1010 self.sOpcode = None; # type: str
1011 self.sSubOpcode = None; # type: str
1012 self.sEncoding = None;
1013 self.asFlTest = None;
1014 self.asFlModify = None;
1015 self.asFlUndefined = None;
1016 self.asFlSet = None;
1017 self.asFlClear = None;
1018 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1019 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1020 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1021 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1022 self.aoTests = []; # type: list(InstructionTest)
1023 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1024 self.oCpuExpr = None; ##< Some CPU restriction expression...
1025 self.sGroup = None;
1026 self.fUnused = False; ##< Unused instruction.
1027 self.fInvalid = False; ##< Invalid instruction (like UD2).
1028 self.sInvalidStyle = None; ##< Invalid behviour style
1029 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1030 ## @}
1031
1032 ## @name Implementation attributes.
1033 ## @{
1034 self.sStats = None;
1035 self.sFunction = None;
1036 self.fStub = False;
1037 self.fUdStub = False;
1038 ## @}
1039
1040 ## @name Decoding info
1041 ## @{
1042 self.sSrcFile = sSrcFile;
1043 self.iLineCreated = iLine;
1044 self.iLineCompleted = None;
1045 self.cOpTags = 0;
1046 self.iLineFnIemOpMacro = -1;
1047 self.iLineMnemonicMacro = -1;
1048 ## @}
1049
1050 ## @name Intermediate input fields.
1051 ## @{
1052 self.sRawDisOpNo = None;
1053 self.asRawDisParams = [];
1054 self.sRawIemOpFlags = None;
1055 self.sRawOldOpcodes = None;
1056 self.asCopyTests = [];
1057 ## @}
1058
1059 def toString(self, fRepr = False):
1060 """ Turn object into a string. """
1061 aasFields = [];
1062
1063 aasFields.append(['opcode', self.sOpcode]);
1064 aasFields.append(['mnemonic', self.sMnemonic]);
1065 for iOperand, oOperand in enumerate(self.aoOperands):
1066 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1067 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1068 aasFields.append(['encoding', self.sEncoding]);
1069 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1070 aasFields.append(['disenum', self.sDisEnum]);
1071 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1072 aasFields.append(['group', self.sGroup]);
1073 if self.fUnused: aasFields.append(['unused', 'True']);
1074 if self.fInvalid: aasFields.append(['invalid', 'True']);
1075 aasFields.append(['invlstyle', self.sInvalidStyle]);
1076 aasFields.append(['fltest', self.asFlTest]);
1077 aasFields.append(['flmodify', self.asFlModify]);
1078 aasFields.append(['flundef', self.asFlUndefined]);
1079 aasFields.append(['flset', self.asFlSet]);
1080 aasFields.append(['flclear', self.asFlClear]);
1081 aasFields.append(['mincpu', self.sMinCpu]);
1082 aasFields.append(['stats', self.sStats]);
1083 aasFields.append(['sFunction', self.sFunction]);
1084 if self.fStub: aasFields.append(['fStub', 'True']);
1085 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1086 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1087 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1088 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1089
1090 sRet = '<' if fRepr else '';
1091 for sField, sValue in aasFields:
1092 if sValue != None:
1093 if len(sRet) > 1:
1094 sRet += '; ';
1095 sRet += '%s=%s' % (sField, sValue,);
1096 if fRepr:
1097 sRet += '>';
1098
1099 return sRet;
1100
1101 def __str__(self):
1102 """ Provide string represenation. """
1103 return self.toString(False);
1104
1105 def __repr__(self):
1106 """ Provide unambigious string representation. """
1107 return self.toString(True);
1108
1109 def getOpcodeByte(self):
1110 """
1111 Decodes sOpcode into a byte range integer value.
1112 Raises exception if sOpcode is None or invalid.
1113 """
1114 if self.sOpcode is None:
1115 raise Exception('No opcode byte for %s!' % (self,));
1116 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1117
1118 # Full hex byte form.
1119 if sOpcode[:2] == '0x':
1120 return int(sOpcode, 16);
1121
1122 # The /r form:
1123 if sOpcode[0] == '/' and sOpcode[1].isdigit() and len(sOpcode) == 2:
1124 return int(sOpcode[1:]) << 3;
1125
1126 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1127
1128 @staticmethod
1129 def _flagsToIntegerMask(asFlags):
1130 """
1131 Returns the integer mask value for asFlags.
1132 """
1133 uRet = 0;
1134 if asFlags:
1135 for sFlag in asFlags:
1136 sConstant = g_kdEFlagsMnemonics[sFlag];
1137 assert sConstant[0] != '!', sConstant
1138 uRet |= g_kdX86EFlagsConstants[sConstant];
1139 return uRet;
1140
1141 def getTestedFlagsMask(self):
1142 """ Returns asFlTest into a integer mask value """
1143 return self._flagsToIntegerMask(self.asFlTest);
1144
1145 def getModifiedFlagsMask(self):
1146 """ Returns asFlModify into a integer mask value """
1147 return self._flagsToIntegerMask(self.asFlModify);
1148
1149 def getUndefinedFlagsMask(self):
1150 """ Returns asFlUndefined into a integer mask value """
1151 return self._flagsToIntegerMask(self.asFlUndefined);
1152
1153 def getSetFlagsMask(self):
1154 """ Returns asFlSet into a integer mask value """
1155 return self._flagsToIntegerMask(self.asFlSet);
1156
1157 def getClearedFlagsMask(self):
1158 """ Returns asFlClear into a integer mask value """
1159 return self._flagsToIntegerMask(self.asFlClear);
1160
1161
1162## All the instructions.
1163g_aoAllInstructions = []; # type: list(Instruction)
1164
1165## All the instructions indexed by statistics name (opstat).
1166g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1167
1168## All the instructions indexed by function name (opfunction).
1169g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1170
1171## Instructions tagged by oponlytest
1172g_aoOnlyTestInstructions = []; # type: list(Instruction)
1173
1174## Instruction maps.
1175g_dInstructionMaps = {
1176 'one': InstructionMap('one'),
1177 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1178 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1179 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1180 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1181 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1182 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1183 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1184 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1185 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1186 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1187 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1188 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1189 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1190 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1191 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1192 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1193 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1194 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1195 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1196
1197 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1198 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1199 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1200 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1201 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1202 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1203 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1204 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1205 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1206 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1207 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
1208 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1209 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1210 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1211
1212 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1213 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1214
1215 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1216 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1217 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1218 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1219 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1220 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1221
1222 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1223 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1224
1225 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1226 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1227 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1228 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1229 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1230 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1231 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1232};
1233
1234
1235
1236class ParserException(Exception):
1237 """ Parser exception """
1238 def __init__(self, sMessage):
1239 Exception.__init__(self, sMessage);
1240
1241
1242class SimpleParser(object):
1243 """
1244 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1245 """
1246
1247 ## @name Parser state.
1248 ## @{
1249 kiCode = 0;
1250 kiCommentMulti = 1;
1251 ## @}
1252
1253 def __init__(self, sSrcFile, asLines, sDefaultMap):
1254 self.sSrcFile = sSrcFile;
1255 self.asLines = asLines;
1256 self.iLine = 0;
1257 self.iState = self.kiCode;
1258 self.sComment = '';
1259 self.iCommentLine = 0;
1260 self.aoCurInstrs = [];
1261
1262 assert sDefaultMap in g_dInstructionMaps;
1263 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1264
1265 self.cTotalInstr = 0;
1266 self.cTotalStubs = 0;
1267 self.cTotalTagged = 0;
1268
1269 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1270 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1271 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1272 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1273 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1274 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1275 self.fDebug = True;
1276
1277 self.dTagHandlers = {
1278 '@opbrief': self.parseTagOpBrief,
1279 '@opdesc': self.parseTagOpDesc,
1280 '@opmnemonic': self.parseTagOpMnemonic,
1281 '@op1': self.parseTagOpOperandN,
1282 '@op2': self.parseTagOpOperandN,
1283 '@op3': self.parseTagOpOperandN,
1284 '@op4': self.parseTagOpOperandN,
1285 '@oppfx': self.parseTagOpPfx,
1286 '@opmaps': self.parseTagOpMaps,
1287 '@opcode': self.parseTagOpcode,
1288 '@opcodesub': self.parseTagOpcodeSub,
1289 '@openc': self.parseTagOpEnc,
1290 '@opfltest': self.parseTagOpEFlags,
1291 '@opflmodify': self.parseTagOpEFlags,
1292 '@opflundef': self.parseTagOpEFlags,
1293 '@opflset': self.parseTagOpEFlags,
1294 '@opflclear': self.parseTagOpEFlags,
1295 '@ophints': self.parseTagOpHints,
1296 '@opdisenum': self.parseTagOpDisEnum,
1297 '@opmincpu': self.parseTagOpMinCpu,
1298 '@opcpuid': self.parseTagOpCpuId,
1299 '@opgroup': self.parseTagOpGroup,
1300 '@opunused': self.parseTagOpUnusedInvalid,
1301 '@opinvalid': self.parseTagOpUnusedInvalid,
1302 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1303 '@optest': self.parseTagOpTest,
1304 '@optestign': self.parseTagOpTestIgnore,
1305 '@optestignore': self.parseTagOpTestIgnore,
1306 '@opcopytests': self.parseTagOpCopyTests,
1307 '@oponlytest': self.parseTagOpOnlyTest,
1308 '@opxcpttype': self.parseTagOpXcptType,
1309 '@opstats': self.parseTagOpStats,
1310 '@opfunction': self.parseTagOpFunction,
1311 '@opdone': self.parseTagOpDone,
1312 };
1313
1314 self.asErrors = [];
1315
1316 def raiseError(self, sMessage):
1317 """
1318 Raise error prefixed with the source and line number.
1319 """
1320 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1321
1322 def raiseCommentError(self, iLineInComment, sMessage):
1323 """
1324 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1325 """
1326 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1327
1328 def error(self, sMessage):
1329 """
1330 Adds an error.
1331 returns False;
1332 """
1333 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1334 return False;
1335
1336 def errorComment(self, iLineInComment, sMessage):
1337 """
1338 Adds a comment error.
1339 returns False;
1340 """
1341 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1342 return False;
1343
1344 def printErrors(self):
1345 """
1346 Print the errors to stderr.
1347 Returns number of errors.
1348 """
1349 if self.asErrors:
1350 sys.stderr.write(u''.join(self.asErrors));
1351 return len(self.asErrors);
1352
1353 def debug(self, sMessage):
1354 """
1355 For debugging.
1356 """
1357 if self.fDebug:
1358 print('debug: %s' % (sMessage,));
1359
1360
1361 def addInstruction(self, iLine = None):
1362 """
1363 Adds an instruction.
1364 """
1365 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1366 g_aoAllInstructions.append(oInstr);
1367 self.aoCurInstrs.append(oInstr);
1368 return oInstr;
1369
1370 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1371 """
1372 Derives the mnemonic and operands from a IEM stats base name like string.
1373 """
1374 if oInstr.sMnemonic is None:
1375 asWords = sStats.split('_');
1376 oInstr.sMnemonic = asWords[0].lower();
1377 if len(asWords) > 1 and not oInstr.aoOperands:
1378 for sType in asWords[1:]:
1379 if sType in g_kdOpTypes:
1380 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1381 else:
1382 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1383 return False;
1384 return True;
1385
1386 def doneInstructionOne(self, oInstr, iLine):
1387 """
1388 Complete the parsing by processing, validating and expanding raw inputs.
1389 """
1390 assert oInstr.iLineCompleted is None;
1391 oInstr.iLineCompleted = iLine;
1392
1393 #
1394 # Specified instructions.
1395 #
1396 if oInstr.cOpTags > 0:
1397 if oInstr.sStats is None:
1398 pass;
1399
1400 #
1401 # Unspecified legacy stuff. We generally only got a few things to go on here.
1402 # /** Opcode 0x0f 0x00 /0. */
1403 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1404 #
1405 else:
1406 #if oInstr.sRawOldOpcodes:
1407 #
1408 #if oInstr.sMnemonic:
1409 pass;
1410
1411 #
1412 # Common defaults.
1413 #
1414
1415 # Guess mnemonic and operands from stats if the former is missing.
1416 if oInstr.sMnemonic is None:
1417 if oInstr.sStats is not None:
1418 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1419 elif oInstr.sFunction is not None:
1420 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1421
1422 # Derive the disassembler op enum constant from the mnemonic.
1423 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1424 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1425
1426 # Derive the IEM statistics base name from mnemonic and operand types.
1427 if oInstr.sStats is None:
1428 if oInstr.sFunction is not None:
1429 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1430 elif oInstr.sMnemonic is not None:
1431 oInstr.sStats = oInstr.sMnemonic;
1432 for oOperand in oInstr.aoOperands:
1433 if oOperand.sType:
1434 oInstr.sStats += '_' + oOperand.sType;
1435
1436 # Derive the IEM function name from mnemonic and operand types.
1437 if oInstr.sFunction is None:
1438 if oInstr.sMnemonic is not None:
1439 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1440 for oOperand in oInstr.aoOperands:
1441 if oOperand.sType:
1442 oInstr.sFunction += '_' + oOperand.sType;
1443 elif oInstr.sStats:
1444 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1445
1446 # Derive encoding from operands.
1447 if oInstr.sEncoding is None:
1448 if not oInstr.aoOperands:
1449 if oInstr.fUnused and oInstr.sSubOpcode:
1450 oInstr.sEncoding = 'ModR/M';
1451 else:
1452 oInstr.sEncoding = 'fixed';
1453 elif oInstr.aoOperands[0].usesModRM():
1454 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1455 oInstr.sEncoding = 'ModR/M+VEX';
1456 else:
1457 oInstr.sEncoding = 'ModR/M';
1458
1459 #
1460 # Apply default map and then add the instruction to all it's groups.
1461 #
1462 if not oInstr.aoMaps:
1463 oInstr.aoMaps = [ self.oDefaultMap, ];
1464 for oMap in oInstr.aoMaps:
1465 oMap.aoInstructions.append(oInstr);
1466
1467 #
1468 # Check the opstat value and add it to the opstat indexed dictionary.
1469 #
1470 if oInstr.sStats:
1471 if oInstr.sStats not in g_dAllInstructionsByStat:
1472 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1473 else:
1474 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1475 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1476
1477 #
1478 # Add to function indexed dictionary. We allow multiple instructions per function.
1479 #
1480 if oInstr.sFunction:
1481 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1482 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1483 else:
1484 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1485
1486 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1487 return True;
1488
1489 def doneInstructions(self, iLineInComment = None):
1490 """
1491 Done with current instruction.
1492 """
1493 for oInstr in self.aoCurInstrs:
1494 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1495 if oInstr.fStub:
1496 self.cTotalStubs += 1;
1497
1498 self.cTotalInstr += len(self.aoCurInstrs);
1499
1500 self.sComment = '';
1501 self.aoCurInstrs = [];
1502 return True;
1503
1504 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1505 """
1506 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1507 is False, only None values and empty strings are replaced.
1508 """
1509 for oInstr in self.aoCurInstrs:
1510 if fOverwrite is not True:
1511 oOldValue = getattr(oInstr, sAttrib);
1512 if oOldValue is not None:
1513 continue;
1514 setattr(oInstr, sAttrib, oValue);
1515
1516 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1517 """
1518 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1519 If fOverwrite is False, only None values and empty strings are replaced.
1520 """
1521 for oInstr in self.aoCurInstrs:
1522 aoArray = getattr(oInstr, sAttrib);
1523 while len(aoArray) <= iEntry:
1524 aoArray.append(None);
1525 if fOverwrite is True or aoArray[iEntry] is None:
1526 aoArray[iEntry] = oValue;
1527
1528 def parseCommentOldOpcode(self, asLines):
1529 """ Deals with 'Opcode 0xff /4' like comments """
1530 asWords = asLines[0].split();
1531 if len(asWords) >= 2 \
1532 and asWords[0] == 'Opcode' \
1533 and ( asWords[1].startswith('0x')
1534 or asWords[1].startswith('0X')):
1535 asWords = asWords[:1];
1536 for iWord, sWord in enumerate(asWords):
1537 if sWord.startswith('0X'):
1538 sWord = '0x' + sWord[:2];
1539 asWords[iWord] = asWords;
1540 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1541
1542 return False;
1543
1544 def ensureInstructionForOpTag(self, iTagLine):
1545 """ Ensure there is an instruction for the op-tag being parsed. """
1546 if not self.aoCurInstrs:
1547 self.addInstruction(self.iCommentLine + iTagLine);
1548 for oInstr in self.aoCurInstrs:
1549 oInstr.cOpTags += 1;
1550 if oInstr.cOpTags == 1:
1551 self.cTotalTagged += 1;
1552 return self.aoCurInstrs[-1];
1553
1554 @staticmethod
1555 def flattenSections(aasSections):
1556 """
1557 Flattens multiline sections into stripped single strings.
1558 Returns list of strings, on section per string.
1559 """
1560 asRet = [];
1561 for asLines in aasSections:
1562 if asLines:
1563 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1564 return asRet;
1565
1566 @staticmethod
1567 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1568 """
1569 Flattens sections into a simple stripped string with newlines as
1570 section breaks. The final section does not sport a trailing newline.
1571 """
1572 # Typical: One section with a single line.
1573 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1574 return aasSections[0][0].strip();
1575
1576 sRet = '';
1577 for iSection, asLines in enumerate(aasSections):
1578 if asLines:
1579 if iSection > 0:
1580 sRet += sSectionSep;
1581 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1582 return sRet;
1583
1584
1585
1586 ## @name Tag parsers
1587 ## @{
1588
1589 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1590 """
1591 Tag: \@opbrief
1592 Value: Text description, multiple sections, appended.
1593
1594 Brief description. If not given, it's the first sentence from @opdesc.
1595 """
1596 oInstr = self.ensureInstructionForOpTag(iTagLine);
1597
1598 # Flatten and validate the value.
1599 sBrief = self.flattenAllSections(aasSections);
1600 if not sBrief:
1601 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1602 if sBrief[-1] != '.':
1603 sBrief = sBrief + '.';
1604 if len(sBrief) > 180:
1605 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1606 offDot = sBrief.find('.');
1607 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1608 offDot = sBrief.find('.', offDot + 1);
1609 if offDot >= 0 and offDot != len(sBrief) - 1:
1610 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1611
1612 # Update the instruction.
1613 if oInstr.sBrief is not None:
1614 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1615 % (sTag, oInstr.sBrief, sBrief,));
1616 _ = iEndLine;
1617 return True;
1618
1619 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1620 """
1621 Tag: \@opdesc
1622 Value: Text description, multiple sections, appended.
1623
1624 It is used to describe instructions.
1625 """
1626 oInstr = self.ensureInstructionForOpTag(iTagLine);
1627 if aasSections:
1628 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1629 return True;
1630
1631 _ = sTag; _ = iEndLine;
1632 return True;
1633
1634 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1635 """
1636 Tag: @opmenmonic
1637 Value: mnemonic
1638
1639 The 'mnemonic' value must be a valid C identifier string. Because of
1640 prefixes, groups and whatnot, there times when the mnemonic isn't that
1641 of an actual assembler mnemonic.
1642 """
1643 oInstr = self.ensureInstructionForOpTag(iTagLine);
1644
1645 # Flatten and validate the value.
1646 sMnemonic = self.flattenAllSections(aasSections);
1647 if not self.oReMnemonic.match(sMnemonic):
1648 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1649 if oInstr.sMnemonic is not None:
1650 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1651 % (sTag, oInstr.sMnemonic, sMnemonic,));
1652 oInstr.sMnemonic = sMnemonic
1653
1654 _ = iEndLine;
1655 return True;
1656
1657 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1658 """
1659 Tags: \@op1, \@op2, \@op3, \@op4
1660 Value: [where:]type
1661
1662 The 'where' value indicates where the operand is found, like the 'reg'
1663 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1664 a list.
1665
1666 The 'type' value indicates the operand type. These follow the types
1667 given in the opcode tables in the CPU reference manuals.
1668 See Instruction.kdOperandTypes for a list.
1669
1670 """
1671 oInstr = self.ensureInstructionForOpTag(iTagLine);
1672 idxOp = int(sTag[-1]) - 1;
1673 assert idxOp >= 0 and idxOp < 4;
1674
1675 # flatten, split up, and validate the "where:type" value.
1676 sFlattened = self.flattenAllSections(aasSections);
1677 asSplit = sFlattened.split(':');
1678 if len(asSplit) == 1:
1679 sType = asSplit[0];
1680 sWhere = None;
1681 elif len(asSplit) == 2:
1682 (sWhere, sType) = asSplit;
1683 else:
1684 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1685
1686 if sType not in g_kdOpTypes:
1687 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1688 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1689 if sWhere is None:
1690 sWhere = g_kdOpTypes[sType][1];
1691 elif sWhere not in g_kdOpLocations:
1692 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1693 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1694
1695 # Insert the operand, refusing to overwrite an existing one.
1696 while idxOp >= len(oInstr.aoOperands):
1697 oInstr.aoOperands.append(None);
1698 if oInstr.aoOperands[idxOp] is not None:
1699 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1700 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1701 sWhere, sType,));
1702 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1703
1704 _ = iEndLine;
1705 return True;
1706
1707 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1708 """
1709 Tag: \@opmaps
1710 Value: map[,map2]
1711
1712 Indicates which maps the instruction is in. There is a default map
1713 associated with each input file.
1714 """
1715 oInstr = self.ensureInstructionForOpTag(iTagLine);
1716
1717 # Flatten, split up and validate the value.
1718 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1719 asMaps = sFlattened.split(',');
1720 if not asMaps:
1721 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1722 for sMap in asMaps:
1723 if sMap not in g_dInstructionMaps:
1724 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1725 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1726
1727 # Add the maps to the current list. Throw errors on duplicates.
1728 for oMap in oInstr.aoMaps:
1729 if oMap.sName in asMaps:
1730 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1731
1732 for sMap in asMaps:
1733 oMap = g_dInstructionMaps[sMap];
1734 if oMap not in oInstr.aoMaps:
1735 oInstr.aoMaps.append(oMap);
1736 else:
1737 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1738
1739 _ = iEndLine;
1740 return True;
1741
1742 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1743 """
1744 Tag: \@oppfx
1745 Value: n/a|none|0x66|0xf3|0xf2
1746
1747 Required prefix for the instruction. (In a (E)VEX context this is the
1748 value of the 'pp' field rather than an actual prefix.)
1749 """
1750 oInstr = self.ensureInstructionForOpTag(iTagLine);
1751
1752 # Flatten and validate the value.
1753 sFlattened = self.flattenAllSections(aasSections);
1754 asPrefixes = sFlattened.split();
1755 if len(asPrefixes) > 1:
1756 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1757
1758 sPrefix = asPrefixes[0].lower();
1759 if sPrefix == 'none':
1760 sPrefix = 'none';
1761 elif sPrefix == 'n/a':
1762 sPrefix = None;
1763 else:
1764 if len(sPrefix) == 2:
1765 sPrefix = '0x' + sPrefix;
1766 if not _isValidOpcodeByte(sPrefix):
1767 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1768
1769 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1770 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1771
1772 # Set it.
1773 if oInstr.sPrefix is not None:
1774 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1775 oInstr.sPrefix = sPrefix;
1776
1777 _ = iEndLine;
1778 return True;
1779
1780 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1781 """
1782 Tag: \@opcode
1783 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1784
1785 The opcode byte or sub-byte for the instruction in the context of a map.
1786 """
1787 oInstr = self.ensureInstructionForOpTag(iTagLine);
1788
1789 # Flatten and validate the value.
1790 sOpcode = self.flattenAllSections(aasSections);
1791 if sOpcode in g_kdSpecialOpcodes:
1792 pass;
1793 elif not _isValidOpcodeByte(sOpcode):
1794 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1795
1796 # Set it.
1797 if oInstr.sOpcode is not None:
1798 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1799 oInstr.sOpcode = sOpcode;
1800
1801 _ = iEndLine;
1802 return True;
1803
1804 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
1805 """
1806 Tag: \@opcodesub
1807 Value: none | 11 mr/reg | !11 mr/reg
1808
1809 This is a simple way of dealing with encodings where the mod=3 and mod!=3
1810 represents exactly two different instructions. The more proper way would
1811 be to go via maps with two members, but this is faster.
1812 """
1813 oInstr = self.ensureInstructionForOpTag(iTagLine);
1814
1815 # Flatten and validate the value.
1816 sSubOpcode = self.flattenAllSections(aasSections);
1817 if sSubOpcode not in g_kdSubOpcodes:
1818 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
1819 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
1820
1821 # Set it.
1822 if oInstr.sSubOpcode is not None:
1823 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1824 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
1825 oInstr.sSubOpcode = sSubOpcode;
1826
1827 _ = iEndLine;
1828 return True;
1829
1830 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1831 """
1832 Tag: \@openc
1833 Value: ModR/M|fixed|prefix|<map name>
1834
1835 The instruction operand encoding style.
1836 """
1837 oInstr = self.ensureInstructionForOpTag(iTagLine);
1838
1839 # Flatten and validate the value.
1840 sEncoding = self.flattenAllSections(aasSections);
1841 if sEncoding in g_kdEncodings:
1842 pass;
1843 elif sEncoding in g_dInstructionMaps:
1844 pass;
1845 elif not _isValidOpcodeByte(sEncoding):
1846 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1847
1848 # Set it.
1849 if oInstr.sEncoding is not None:
1850 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1851 % ( sTag, oInstr.sEncoding, sEncoding,));
1852 oInstr.sEncoding = sEncoding;
1853
1854 _ = iEndLine;
1855 return True;
1856
1857 ## EFlags tag to Instruction attribute name.
1858 kdOpFlagToAttr = {
1859 '@opfltest': 'asFlTest',
1860 '@opflmodify': 'asFlModify',
1861 '@opflundef': 'asFlUndefined',
1862 '@opflset': 'asFlSet',
1863 '@opflclear': 'asFlClear',
1864 };
1865
1866 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1867 """
1868 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1869 Value: <eflags specifier>
1870
1871 """
1872 oInstr = self.ensureInstructionForOpTag(iTagLine);
1873
1874 # Flatten, split up and validate the values.
1875 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1876 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1877 asFlags = [];
1878 else:
1879 fRc = True;
1880 for iFlag, sFlag in enumerate(asFlags):
1881 if sFlag not in g_kdEFlagsMnemonics:
1882 if sFlag.strip() in g_kdEFlagsMnemonics:
1883 asFlags[iFlag] = sFlag.strip();
1884 else:
1885 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1886 if not fRc:
1887 return False;
1888
1889 # Set them.
1890 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1891 if asOld is not None:
1892 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1893 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1894
1895 _ = iEndLine;
1896 return True;
1897
1898 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1899 """
1900 Tag: \@ophints
1901 Value: Comma or space separated list of flags and hints.
1902
1903 This covers the disassembler flags table and more.
1904 """
1905 oInstr = self.ensureInstructionForOpTag(iTagLine);
1906
1907 # Flatten as a space separated list, split it up and validate the values.
1908 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1909 if len(asHints) == 1 and asHints[0].lower() == 'none':
1910 asHints = [];
1911 else:
1912 fRc = True;
1913 for iHint, sHint in enumerate(asHints):
1914 if sHint not in g_kdHints:
1915 if sHint.strip() in g_kdHints:
1916 sHint[iHint] = sHint.strip();
1917 else:
1918 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1919 if not fRc:
1920 return False;
1921
1922 # Append them.
1923 for sHint in asHints:
1924 if sHint not in oInstr.dHints:
1925 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1926 else:
1927 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1928
1929 _ = iEndLine;
1930 return True;
1931
1932 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
1933 """
1934 Tag: \@opdisenum
1935 Value: OP_XXXX
1936
1937 This is for select a specific (legacy) disassembler enum value for the
1938 instruction.
1939 """
1940 oInstr = self.ensureInstructionForOpTag(iTagLine);
1941
1942 # Flatten and split.
1943 asWords = self.flattenAllSections(aasSections).split();
1944 if len(asWords) != 1:
1945 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
1946 if not asWords:
1947 return False;
1948 sDisEnum = asWords[0];
1949 if not self.oReDisEnum.match(sDisEnum):
1950 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
1951 % (sTag, sDisEnum, self.oReDisEnum.pattern));
1952
1953 # Set it.
1954 if oInstr.sDisEnum is not None:
1955 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
1956 oInstr.sDisEnum = sDisEnum;
1957
1958 _ = iEndLine;
1959 return True;
1960
1961 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
1962 """
1963 Tag: \@opmincpu
1964 Value: <simple CPU name>
1965
1966 Indicates when this instruction was introduced.
1967 """
1968 oInstr = self.ensureInstructionForOpTag(iTagLine);
1969
1970 # Flatten the value, split into words, make sure there's just one, valid it.
1971 asCpus = self.flattenAllSections(aasSections).split();
1972 if len(asCpus) > 1:
1973 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
1974
1975 sMinCpu = asCpus[0];
1976 if sMinCpu in g_kdCpuNames:
1977 oInstr.sMinCpu = sMinCpu;
1978 else:
1979 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
1980 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
1981
1982 # Set it.
1983 if oInstr.sMinCpu is None:
1984 oInstr.sMinCpu = sMinCpu;
1985 elif oInstr.sMinCpu != sMinCpu:
1986 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
1987
1988 _ = iEndLine;
1989 return True;
1990
1991 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1992 """
1993 Tag: \@opcpuid
1994 Value: none | <CPUID flag specifier>
1995
1996 CPUID feature bit which is required for the instruction to be present.
1997 """
1998 oInstr = self.ensureInstructionForOpTag(iTagLine);
1999
2000 # Flatten as a space separated list, split it up and validate the values.
2001 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2002 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2003 asCpuIds = [];
2004 else:
2005 fRc = True;
2006 for iCpuId, sCpuId in enumerate(asCpuIds):
2007 if sCpuId not in g_kdCpuIdFlags:
2008 if sCpuId.strip() in g_kdCpuIdFlags:
2009 sCpuId[iCpuId] = sCpuId.strip();
2010 else:
2011 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2012 if not fRc:
2013 return False;
2014
2015 # Append them.
2016 for sCpuId in asCpuIds:
2017 if sCpuId not in oInstr.asCpuIds:
2018 oInstr.asCpuIds.append(sCpuId);
2019 else:
2020 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2021
2022 _ = iEndLine;
2023 return True;
2024
2025 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2026 """
2027 Tag: \@opgroup
2028 Value: op_grp1[_subgrp2[_subsubgrp3]]
2029
2030 Instruction grouping.
2031 """
2032 oInstr = self.ensureInstructionForOpTag(iTagLine);
2033
2034 # Flatten as a space separated list, split it up and validate the values.
2035 asGroups = self.flattenAllSections(aasSections).split();
2036 if len(asGroups) != 1:
2037 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2038 sGroup = asGroups[0];
2039 if not self.oReGroupName.match(sGroup):
2040 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2041 % (sTag, sGroup, self.oReGroupName.pattern));
2042
2043 # Set it.
2044 if oInstr.sGroup is not None:
2045 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2046 oInstr.sGroup = sGroup;
2047
2048 _ = iEndLine;
2049 return True;
2050
2051 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2052 """
2053 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2054 Value: <invalid opcode behaviour style>
2055
2056 The \@opunused indicates the specification is for a currently unused
2057 instruction encoding.
2058
2059 The \@opinvalid indicates the specification is for an invalid currently
2060 instruction encoding (like UD2).
2061
2062 The \@opinvlstyle just indicates how CPUs decode the instruction when
2063 not supported (\@opcpuid, \@opmincpu) or disabled.
2064 """
2065 oInstr = self.ensureInstructionForOpTag(iTagLine);
2066
2067 # Flatten as a space separated list, split it up and validate the values.
2068 asStyles = self.flattenAllSections(aasSections).split();
2069 if len(asStyles) != 1:
2070 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2071 sStyle = asStyles[0];
2072 if sStyle not in g_kdInvalidStyles:
2073 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2074 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2075 # Set it.
2076 if oInstr.sInvalidStyle is not None:
2077 return self.errorComment(iTagLine,
2078 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2079 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2080 oInstr.sInvalidStyle = sStyle;
2081 if sTag == '@opunused':
2082 oInstr.fUnused = True;
2083 elif sTag == '@opinvalid':
2084 oInstr.fInvalid = True;
2085
2086 _ = iEndLine;
2087 return True;
2088
2089 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2090 """
2091 Tag: \@optest
2092 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2093 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2094
2095 The main idea here is to generate basic instruction tests.
2096
2097 The probably simplest way of handling the diverse input, would be to use
2098 it to produce size optimized byte code for a simple interpreter that
2099 modifies the register input and output states.
2100
2101 An alternative to the interpreter would be creating multiple tables,
2102 but that becomes rather complicated wrt what goes where and then to use
2103 them in an efficient manner.
2104 """
2105 oInstr = self.ensureInstructionForOpTag(iTagLine);
2106
2107 #
2108 # Do it section by section.
2109 #
2110 for asSectionLines in aasSections:
2111 #
2112 # Sort the input into outputs, inputs and selector conditions.
2113 #
2114 sFlatSection = self.flattenAllSections([asSectionLines,]);
2115 if not sFlatSection:
2116 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2117 continue;
2118 oTest = InstructionTest(oInstr);
2119
2120 asSelectors = [];
2121 asInputs = [];
2122 asOutputs = [];
2123 asCur = asOutputs;
2124 fRc = True;
2125 asWords = sFlatSection.split();
2126 for iWord in range(len(asWords) - 1, -1, -1):
2127 sWord = asWords[iWord];
2128 # Check for array switchers.
2129 if sWord == '->':
2130 if asCur != asOutputs:
2131 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2132 break;
2133 asCur = asInputs;
2134 elif sWord == '/':
2135 if asCur != asInputs:
2136 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2137 break;
2138 asCur = asSelectors;
2139 else:
2140 asCur.insert(0, sWord);
2141
2142 #
2143 # Validate and add selectors.
2144 #
2145 for sCond in asSelectors:
2146 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2147 oSelector = None;
2148 for sOp in TestSelector.kasCompareOps:
2149 off = sCondExp.find(sOp);
2150 if off >= 0:
2151 sVariable = sCondExp[:off];
2152 sValue = sCondExp[off + len(sOp):];
2153 if sVariable in TestSelector.kdVariables:
2154 if sValue in TestSelector.kdVariables[sVariable]:
2155 oSelector = TestSelector(sVariable, sOp, sValue);
2156 else:
2157 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2158 % ( sTag, sValue, sCond,
2159 TestSelector.kdVariables[sVariable].keys(),));
2160 else:
2161 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2162 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2163 break;
2164 if oSelector is not None:
2165 for oExisting in oTest.aoSelectors:
2166 if oExisting.sVariable == oSelector.sVariable:
2167 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2168 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2169 oTest.aoSelectors.append(oSelector);
2170 else:
2171 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2172
2173 #
2174 # Validate outputs and inputs, adding them to the test as we go along.
2175 #
2176 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2177 asValidFieldKinds = [ 'both', sDesc, ];
2178 for sItem in asItems:
2179 oItem = None;
2180 for sOp in TestInOut.kasOperators:
2181 off = sItem.find(sOp);
2182 if off < 0:
2183 continue;
2184 sField = sItem[:off];
2185 sValueType = sItem[off + len(sOp):];
2186 if sField in TestInOut.kdFields \
2187 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2188 asSplit = sValueType.split(':', 1);
2189 sValue = asSplit[0];
2190 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2191 if sType in TestInOut.kdTypes:
2192 oValid = TestInOut.kdTypes[sType].validate(sValue);
2193 if oValid is True:
2194 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2195 oItem = TestInOut(sField, sOp, sValue, sType);
2196 else:
2197 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2198 % ( sTag, sDesc, sItem, ));
2199 else:
2200 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2201 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2202 else:
2203 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2204 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2205 else:
2206 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2207 % ( sTag, sDesc, sField, sItem,
2208 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2209 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2210 break;
2211 if oItem is not None:
2212 for oExisting in aoDst:
2213 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2214 self.errorComment(iTagLine,
2215 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2216 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2217 aoDst.append(oItem);
2218 else:
2219 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2220
2221 #
2222 # .
2223 #
2224 if fRc:
2225 oInstr.aoTests.append(oTest);
2226 else:
2227 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2228 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2229 % (sTag, asSelectors, asInputs, asOutputs,));
2230
2231 _ = iEndLine;
2232 return True;
2233
2234 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2235 """
2236 Tag: \@optestign | \@optestignore
2237 Value: <value is ignored>
2238
2239 This is a simple trick to ignore a test while debugging another.
2240
2241 See also \@oponlytest.
2242 """
2243 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2244 return True;
2245
2246 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2247 """
2248 Tag: \@opcopytests
2249 Value: <opstat | function> [..]
2250 Example: \@opcopytests add_Eb_Gb
2251
2252 Trick to avoid duplicating tests for different encodings of the same
2253 operation.
2254 """
2255 oInstr = self.ensureInstructionForOpTag(iTagLine);
2256
2257 # Flatten, validate and append the copy job to the instruction. We execute
2258 # them after parsing all the input so we can handle forward references.
2259 asToCopy = self.flattenAllSections(aasSections).split();
2260 if not asToCopy:
2261 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2262 for sToCopy in asToCopy:
2263 if sToCopy not in oInstr.asCopyTests:
2264 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2265 oInstr.asCopyTests.append(sToCopy);
2266 else:
2267 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2268 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2269 else:
2270 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2271
2272 _ = iEndLine;
2273 return True;
2274
2275 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2276 """
2277 Tag: \@oponlytest
2278 Value: none
2279
2280 Only test instructions with this tag. This is a trick that is handy
2281 for singling out one or two new instructions or tests.
2282
2283 See also \@optestignore.
2284 """
2285 oInstr = self.ensureInstructionForOpTag(iTagLine);
2286
2287 # Validate and add instruction to only test dictionary.
2288 sValue = self.flattenAllSections(aasSections).strip();
2289 if sValue:
2290 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2291
2292 if oInstr not in g_aoOnlyTestInstructions:
2293 g_aoOnlyTestInstructions.append(oInstr);
2294
2295 _ = iEndLine;
2296 return True;
2297
2298 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2299 """
2300 Tag: \@opxcpttype
2301 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2302
2303 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2304 """
2305 oInstr = self.ensureInstructionForOpTag(iTagLine);
2306
2307 # Flatten as a space separated list, split it up and validate the values.
2308 asTypes = self.flattenAllSections(aasSections).split();
2309 if len(asTypes) != 1:
2310 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2311 sType = asTypes[0];
2312 if sType not in g_kdXcptTypes:
2313 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2314 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2315 # Set it.
2316 if oInstr.sXcptType is not None:
2317 return self.errorComment(iTagLine,
2318 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2319 % ( sTag, oInstr.sXcptType, sType,));
2320 oInstr.sXcptType = sType;
2321
2322 _ = iEndLine;
2323 return True;
2324
2325 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2326 """
2327 Tag: \@opfunction
2328 Value: <VMM function name>
2329
2330 This is for explicitly setting the IEM function name. Normally we pick
2331 this up from the FNIEMOP_XXX macro invocation after the description, or
2332 generate it from the mnemonic and operands.
2333
2334 It it thought it maybe necessary to set it when specifying instructions
2335 which implementation isn't following immediately or aren't implemented yet.
2336 """
2337 oInstr = self.ensureInstructionForOpTag(iTagLine);
2338
2339 # Flatten and validate the value.
2340 sFunction = self.flattenAllSections(aasSections);
2341 if not self.oReFunctionName.match(sFunction):
2342 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2343 % (sTag, sFunction, self.oReFunctionName.pattern));
2344
2345 if oInstr.sFunction is not None:
2346 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2347 % (sTag, oInstr.sFunction, sFunction,));
2348 oInstr.sFunction = sFunction;
2349
2350 _ = iEndLine;
2351 return True;
2352
2353 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2354 """
2355 Tag: \@opstats
2356 Value: <VMM statistics base name>
2357
2358 This is for explicitly setting the statistics name. Normally we pick
2359 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2360 the mnemonic and operands.
2361
2362 It it thought it maybe necessary to set it when specifying instructions
2363 which implementation isn't following immediately or aren't implemented yet.
2364 """
2365 oInstr = self.ensureInstructionForOpTag(iTagLine);
2366
2367 # Flatten and validate the value.
2368 sStats = self.flattenAllSections(aasSections);
2369 if not self.oReStatsName.match(sStats):
2370 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2371 % (sTag, sStats, self.oReStatsName.pattern));
2372
2373 if oInstr.sStats is not None:
2374 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2375 % (sTag, oInstr.sStats, sStats,));
2376 oInstr.sStats = sStats;
2377
2378 _ = iEndLine;
2379 return True;
2380
2381 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2382 """
2383 Tag: \@opdone
2384 Value: none
2385
2386 Used to explictily flush the instructions that have been specified.
2387 """
2388 sFlattened = self.flattenAllSections(aasSections);
2389 if sFlattened != '':
2390 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2391 _ = sTag; _ = iEndLine;
2392 return self.doneInstructions();
2393
2394 ## @}
2395
2396
2397 def parseComment(self):
2398 """
2399 Parse the current comment (self.sComment).
2400
2401 If it's a opcode specifiying comment, we reset the macro stuff.
2402 """
2403 #
2404 # Reject if comment doesn't seem to contain anything interesting.
2405 #
2406 if self.sComment.find('Opcode') < 0 \
2407 and self.sComment.find('@') < 0:
2408 return False;
2409
2410 #
2411 # Split the comment into lines, removing leading asterisks and spaces.
2412 # Also remove leading and trailing empty lines.
2413 #
2414 asLines = self.sComment.split('\n');
2415 for iLine, sLine in enumerate(asLines):
2416 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2417
2418 while asLines and not asLines[0]:
2419 self.iCommentLine += 1;
2420 asLines.pop(0);
2421
2422 while asLines and not asLines[-1]:
2423 asLines.pop(len(asLines) - 1);
2424
2425 #
2426 # Check for old style: Opcode 0x0f 0x12
2427 #
2428 if asLines[0].startswith('Opcode '):
2429 self.parseCommentOldOpcode(asLines);
2430
2431 #
2432 # Look for @op* tagged data.
2433 #
2434 cOpTags = 0;
2435 sFlatDefault = None;
2436 sCurTag = '@default';
2437 iCurTagLine = 0;
2438 asCurSection = [];
2439 aasSections = [ asCurSection, ];
2440 for iLine, sLine in enumerate(asLines):
2441 if not sLine.startswith('@'):
2442 if sLine:
2443 asCurSection.append(sLine);
2444 elif asCurSection:
2445 asCurSection = [];
2446 aasSections.append(asCurSection);
2447 else:
2448 #
2449 # Process the previous tag.
2450 #
2451 if not asCurSection and len(aasSections) > 1:
2452 aasSections.pop(-1);
2453 if sCurTag in self.dTagHandlers:
2454 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2455 cOpTags += 1;
2456 elif sCurTag.startswith('@op'):
2457 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2458 elif sCurTag == '@default':
2459 sFlatDefault = self.flattenAllSections(aasSections);
2460 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2461 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2462 elif sCurTag in ['@encoding', '@opencoding']:
2463 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2464
2465 #
2466 # New tag.
2467 #
2468 asSplit = sLine.split(None, 1);
2469 sCurTag = asSplit[0].lower();
2470 if len(asSplit) > 1:
2471 asCurSection = [asSplit[1],];
2472 else:
2473 asCurSection = [];
2474 aasSections = [asCurSection, ];
2475 iCurTagLine = iLine;
2476
2477 #
2478 # Process the final tag.
2479 #
2480 if not asCurSection and len(aasSections) > 1:
2481 aasSections.pop(-1);
2482 if sCurTag in self.dTagHandlers:
2483 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2484 cOpTags += 1;
2485 elif sCurTag.startswith('@op'):
2486 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2487 elif sCurTag == '@default':
2488 sFlatDefault = self.flattenAllSections(aasSections);
2489
2490 #
2491 # Don't allow default text in blocks containing @op*.
2492 #
2493 if cOpTags > 0 and sFlatDefault:
2494 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2495
2496 return True;
2497
2498 def parseMacroInvocation(self, sInvocation):
2499 """
2500 Parses a macro invocation.
2501
2502 Returns a tuple, first element is the offset following the macro
2503 invocation. The second element is a list of macro arguments, where the
2504 zero'th is the macro name.
2505 """
2506 # First the name.
2507 offOpen = sInvocation.find('(');
2508 if offOpen <= 0:
2509 self.raiseError("macro invocation open parenthesis not found");
2510 sName = sInvocation[:offOpen].strip();
2511 if not self.oReMacroName.match(sName):
2512 return self.error("invalid macro name '%s'" % (sName,));
2513 asRet = [sName, ];
2514
2515 # Arguments.
2516 iLine = self.iLine;
2517 cDepth = 1;
2518 off = offOpen + 1;
2519 offStart = off;
2520 while cDepth > 0:
2521 if off >= len(sInvocation):
2522 if iLine >= len(self.asLines):
2523 return self.error('macro invocation beyond end of file');
2524 sInvocation += self.asLines[iLine];
2525 iLine += 1;
2526 ch = sInvocation[off];
2527
2528 if ch == ',' or ch == ')':
2529 if cDepth == 1:
2530 asRet.append(sInvocation[offStart:off].strip());
2531 offStart = off + 1;
2532 if ch == ')':
2533 cDepth -= 1;
2534 elif ch == '(':
2535 cDepth += 1;
2536 off += 1;
2537
2538 return (off, asRet);
2539
2540 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2541 """
2542 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2543 """
2544 offHit = sCode.find(sMacro);
2545 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2546 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2547 return (offHit + offAfter, asRet);
2548 return (len(sCode), None);
2549
2550 def findAndParseMacroInvocation(self, sCode, sMacro):
2551 """
2552 Returns None if not found, arguments as per parseMacroInvocation if found.
2553 """
2554 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2555
2556 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2557 """
2558 Returns same as findAndParseMacroInvocation.
2559 """
2560 for sMacro in asMacro:
2561 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2562 if asRet is not None:
2563 return asRet;
2564 return None;
2565
2566 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2567 sDisHints, sIemHints, asOperands):
2568 """
2569 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2570 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2571 """
2572 #
2573 # Some invocation checks.
2574 #
2575 if sUpper != sUpper.upper():
2576 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2577 if sLower != sLower.lower():
2578 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2579 if sUpper.lower() != sLower:
2580 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2581 if not self.oReMnemonic.match(sLower):
2582 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2583
2584 #
2585 # Check if sIemHints tells us to not consider this macro invocation.
2586 #
2587 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2588 return True;
2589
2590 # Apply to the last instruction only for now.
2591 if not self.aoCurInstrs:
2592 self.addInstruction();
2593 oInstr = self.aoCurInstrs[-1];
2594 if oInstr.iLineMnemonicMacro == -1:
2595 oInstr.iLineMnemonicMacro = self.iLine;
2596 else:
2597 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2598 % (sMacro, oInstr.iLineMnemonicMacro,));
2599
2600 # Mnemonic
2601 if oInstr.sMnemonic is None:
2602 oInstr.sMnemonic = sLower;
2603 elif oInstr.sMnemonic != sLower:
2604 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2605
2606 # Process operands.
2607 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2608 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2609 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2610 for iOperand, sType in enumerate(asOperands):
2611 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2612 if sWhere is None:
2613 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2614 if iOperand < len(oInstr.aoOperands): # error recovery.
2615 sWhere = oInstr.aoOperands[iOperand].sWhere;
2616 sType = oInstr.aoOperands[iOperand].sType;
2617 else:
2618 sWhere = 'reg';
2619 sType = 'Gb';
2620 if iOperand == len(oInstr.aoOperands):
2621 oInstr.aoOperands.append(Operand(sWhere, sType))
2622 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2623 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2624 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2625 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2626
2627 # Encoding.
2628 if sForm not in g_kdIemForms:
2629 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2630 else:
2631 if oInstr.sEncoding is None:
2632 oInstr.sEncoding = g_kdIemForms[sForm][0];
2633 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2634 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2635 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2636
2637 # Check the parameter locations for the encoding.
2638 if g_kdIemForms[sForm][1] is not None:
2639 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2640 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2641 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2642 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2643
2644 # Stats.
2645 if not self.oReStatsName.match(sStats):
2646 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2647 elif oInstr.sStats is None:
2648 oInstr.sStats = sStats;
2649 elif oInstr.sStats != sStats:
2650 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2651 % (sMacro, oInstr.sStats, sStats,));
2652
2653 # Process the hints (simply merge with @ophints w/o checking anything).
2654 for sHint in sDisHints.split('|'):
2655 sHint = sHint.strip();
2656 if sHint.startswith('DISOPTYPE_'):
2657 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2658 if sShortHint in g_kdHints:
2659 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2660 else:
2661 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2662 elif sHint != '0':
2663 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2664
2665 for sHint in sIemHints.split('|'):
2666 sHint = sHint.strip();
2667 if sHint.startswith('IEMOPHINT_'):
2668 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2669 if sShortHint in g_kdHints:
2670 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2671 else:
2672 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2673 elif sHint != '0':
2674 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2675
2676
2677 _ = sAsm;
2678 return True;
2679
2680 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2681 """
2682 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2683 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2684 """
2685 if not asOperands:
2686 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2687 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2688 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2689
2690 def checkCodeForMacro(self, sCode):
2691 """
2692 Checks code for relevant macro invocation.
2693 """
2694 #
2695 # Scan macro invocations.
2696 #
2697 if sCode.find('(') > 0:
2698 # Look for instruction decoder function definitions. ASSUME single line.
2699 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2700 [ 'FNIEMOP_DEF',
2701 'FNIEMOP_STUB',
2702 'FNIEMOP_STUB_1',
2703 'FNIEMOP_UD_STUB',
2704 'FNIEMOP_UD_STUB_1' ]);
2705 if asArgs is not None:
2706 sFunction = asArgs[1];
2707
2708 if not self.aoCurInstrs:
2709 self.addInstruction();
2710 for oInstr in self.aoCurInstrs:
2711 if oInstr.iLineFnIemOpMacro == -1:
2712 oInstr.iLineFnIemOpMacro = self.iLine;
2713 else:
2714 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2715 self.setInstrunctionAttrib('sFunction', sFunction);
2716 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2717 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2718 if asArgs[0].find('STUB') > 0:
2719 self.doneInstructions();
2720 return True;
2721
2722 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2723 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2724 if asArgs is not None:
2725 if len(self.aoCurInstrs) == 1:
2726 oInstr = self.aoCurInstrs[0];
2727 if oInstr.sStats is None:
2728 oInstr.sStats = asArgs[1];
2729 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2730
2731 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2732 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2733 if asArgs is not None:
2734 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2735 []);
2736 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2737 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2738 if asArgs is not None:
2739 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2740 [asArgs[6],]);
2741 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2742 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2743 if asArgs is not None:
2744 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2745 [asArgs[6], asArgs[7]]);
2746 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2747 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2748 if asArgs is not None:
2749 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2750 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2751 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2752 # a_fIemHints)
2753 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2754 if asArgs is not None:
2755 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2756 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2757
2758 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2759 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2760 if asArgs is not None:
2761 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2762 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2763 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2764 if asArgs is not None:
2765 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2766 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2767 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2768 if asArgs is not None:
2769 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2770 [asArgs[4], asArgs[5],]);
2771 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2772 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2773 if asArgs is not None:
2774 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2775 [asArgs[4], asArgs[5], asArgs[6],]);
2776 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2777 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2778 if asArgs is not None:
2779 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2780 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2781
2782 return False;
2783
2784
2785 def parse(self):
2786 """
2787 Parses the given file.
2788 Returns number or errors.
2789 Raises exception on fatal trouble.
2790 """
2791 self.debug('Parsing %s' % (self.sSrcFile,));
2792
2793 while self.iLine < len(self.asLines):
2794 sLine = self.asLines[self.iLine];
2795 self.iLine += 1;
2796
2797 # We only look for comments, so only lines with a slash might possibly
2798 # influence the parser state.
2799 if sLine.find('/') >= 0:
2800 #self.debug('line %d: slash' % (self.iLine,));
2801
2802 offLine = 0;
2803 while offLine < len(sLine):
2804 if self.iState == self.kiCode:
2805 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2806 if offHit >= 0:
2807 self.checkCodeForMacro(sLine[offLine:offHit]);
2808 self.sComment = '';
2809 self.iCommentLine = self.iLine;
2810 self.iState = self.kiCommentMulti;
2811 offLine = offHit + 2;
2812 else:
2813 self.checkCodeForMacro(sLine[offLine:]);
2814 offLine = len(sLine);
2815
2816 elif self.iState == self.kiCommentMulti:
2817 offHit = sLine.find('*/', offLine);
2818 if offHit >= 0:
2819 self.sComment += sLine[offLine:offHit];
2820 self.iState = self.kiCode;
2821 offLine = offHit + 2;
2822 self.parseComment();
2823 else:
2824 self.sComment += sLine[offLine:];
2825 offLine = len(sLine);
2826 else:
2827 assert False;
2828
2829 # No slash, but append the line if in multi-line comment.
2830 elif self.iState == self.kiCommentMulti:
2831 #self.debug('line %d: multi' % (self.iLine,));
2832 self.sComment += sLine;
2833
2834 # No slash, but check code line for relevant macro.
2835 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2836 #self.debug('line %d: macro' % (self.iLine,));
2837 self.checkCodeForMacro(sLine);
2838
2839 # If the line is a '}' in the first position, complete the instructions.
2840 elif self.iState == self.kiCode and sLine[0] == '}':
2841 #self.debug('line %d: }' % (self.iLine,));
2842 self.doneInstructions();
2843
2844 self.doneInstructions();
2845 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2846 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2847 return self.printErrors();
2848
2849
2850def __parseFileByName(sSrcFile, sDefaultMap):
2851 """
2852 Parses one source file for instruction specfications.
2853 """
2854 #
2855 # Read sSrcFile into a line array.
2856 #
2857 try:
2858 oFile = open(sSrcFile, "r");
2859 except Exception as oXcpt:
2860 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
2861 try:
2862 asLines = oFile.readlines();
2863 except Exception as oXcpt:
2864 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
2865 finally:
2866 oFile.close();
2867
2868 #
2869 # Do the parsing.
2870 #
2871 try:
2872 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
2873 except ParserException as oXcpt:
2874 print(str(oXcpt));
2875 raise;
2876 except Exception as oXcpt:
2877 raise;
2878
2879 return cErrors;
2880
2881
2882def __doTestCopying():
2883 """
2884 Executes the asCopyTests instructions.
2885 """
2886 asErrors = [];
2887 for oDstInstr in g_aoAllInstructions:
2888 if oDstInstr.asCopyTests:
2889 for sSrcInstr in oDstInstr.asCopyTests:
2890 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
2891 if oSrcInstr:
2892 aoSrcInstrs = [oSrcInstr,];
2893 else:
2894 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
2895 if aoSrcInstrs:
2896 for oSrcInstr in aoSrcInstrs:
2897 if oSrcInstr != oDstInstr:
2898 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
2899 else:
2900 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
2901 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2902 else:
2903 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
2904 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
2905
2906 if asErrors:
2907 sys.stderr.write(u''.join(asErrors));
2908 return len(asErrors);
2909
2910
2911def __applyOnlyTest():
2912 """
2913 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
2914 all other instructions so that only these get tested.
2915 """
2916 if g_aoOnlyTestInstructions:
2917 for oInstr in g_aoAllInstructions:
2918 if oInstr.aoTests:
2919 if oInstr not in g_aoOnlyTestInstructions:
2920 oInstr.aoTests = [];
2921 return 0;
2922
2923def __parseAll():
2924 """
2925 Parses all the IEMAllInstruction*.cpp.h files.
2926
2927 Raises exception on failure.
2928 """
2929 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2930 cErrors = 0;
2931 for sDefaultMap, sName in [
2932 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2933 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2934 ]:
2935 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2936 cErrors += __doTestCopying();
2937 cErrors += __applyOnlyTest();
2938
2939 if cErrors != 0:
2940 #raise Exception('%d parse errors' % (cErrors,));
2941 sys.exit(1);
2942 return True;
2943
2944
2945
2946__parseAll();
2947
2948
2949#
2950# Generators (may perhaps move later).
2951#
2952def generateDisassemblerTables(oDstFile = sys.stdout):
2953 """
2954 Generates disassembler tables.
2955 """
2956
2957 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
2958 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
2959 assert oMap.sName == sName;
2960 asLines = [];
2961
2962 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
2963 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
2964 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
2965 asLines.append('{');
2966
2967 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
2968
2969 aoTableOrder = oMap.getInstructionsInTableOrder();
2970 for iInstr, oInstr in enumerate(aoTableOrder):
2971
2972 if (iInstr & 0xf) == 0:
2973 if iInstr != 0:
2974 asLines.append('');
2975 asLines.append(' /* %x */' % (iInstr >> 4,));
2976
2977 if oInstr is None:
2978 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
2979 elif isinstance(oInstr, list):
2980 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
2981 else:
2982 sMacro = 'OP';
2983 cMaxOperands = 3;
2984 if len(oInstr.aoOperands) > 3:
2985 sMacro = 'OPVEX'
2986 cMaxOperands = 4;
2987 assert len(oInstr.aoOperands) <= cMaxOperands;
2988
2989 #
2990 # Format string.
2991 #
2992 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
2993 for iOperand, oOperand in enumerate(oInstr.aoOperands):
2994 sTmp += ' ' if iOperand == 0 else ',';
2995 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
2996 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
2997 else:
2998 sTmp += g_kdOpTypes[oOperand.sType][2];
2999 sTmp += '",';
3000 asColumns = [ sTmp, ];
3001
3002 #
3003 # Decoders.
3004 #
3005 iStart = len(asColumns);
3006 if oInstr.sEncoding is None:
3007 pass;
3008 elif oInstr.sEncoding == 'ModR/M':
3009 # ASSUME the first operand is using the ModR/M encoding
3010 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3011 asColumns.append('IDX_ParseModRM,');
3012 ## @todo IDX_ParseVexDest
3013 # Is second operand using ModR/M too?
3014 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3015 asColumns.append('IDX_UseModRM,')
3016 elif oInstr.sEncoding in [ 'prefix', ]:
3017 for oOperand in oInstr.aoOperands:
3018 asColumns.append('0,');
3019 elif oInstr.sEncoding in [ 'fixed' ]:
3020 pass;
3021 elif oInstr.sEncoding == 'vex2':
3022 asColumns.append('IDX_ParseVex2b,')
3023 elif oInstr.sEncoding == 'vex3':
3024 asColumns.append('IDX_ParseVex3b,')
3025 elif oInstr.sEncoding in g_dInstructionMaps:
3026 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3027 else:
3028 ## @todo
3029 #IDX_ParseTwoByteEsc,
3030 #IDX_ParseGrp1,
3031 #IDX_ParseShiftGrp2,
3032 #IDX_ParseGrp3,
3033 #IDX_ParseGrp4,
3034 #IDX_ParseGrp5,
3035 #IDX_Parse3DNow,
3036 #IDX_ParseGrp6,
3037 #IDX_ParseGrp7,
3038 #IDX_ParseGrp8,
3039 #IDX_ParseGrp9,
3040 #IDX_ParseGrp10,
3041 #IDX_ParseGrp12,
3042 #IDX_ParseGrp13,
3043 #IDX_ParseGrp14,
3044 #IDX_ParseGrp15,
3045 #IDX_ParseGrp16,
3046 #IDX_ParseThreeByteEsc4,
3047 #IDX_ParseThreeByteEsc5,
3048 #IDX_ParseModFence,
3049 #IDX_ParseEscFP,
3050 #IDX_ParseNopPause,
3051 #IDX_ParseInvOpModRM,
3052 assert False, str(oInstr);
3053
3054 # Check for immediates and stuff in the remaining operands.
3055 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3056 sIdx = g_kdOpTypes[oOperand.sType][0];
3057 if sIdx != 'IDX_UseModRM':
3058 asColumns.append(sIdx + ',');
3059 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3060
3061 #
3062 # Opcode and operands.
3063 #
3064 assert oInstr.sDisEnum, str(oInstr);
3065 asColumns.append(oInstr.sDisEnum + ',');
3066 iStart = len(asColumns)
3067 for oOperand in oInstr.aoOperands:
3068 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3069 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3070
3071 #
3072 # Flags.
3073 #
3074 sTmp = '';
3075 for sHint in sorted(oInstr.dHints.keys()):
3076 sDefine = g_kdHints[sHint];
3077 if sDefine.startswith('DISOPTYPE_'):
3078 if sTmp:
3079 sTmp += ' | ' + sDefine;
3080 else:
3081 sTmp += sDefine;
3082 if sTmp:
3083 sTmp += '),';
3084 else:
3085 sTmp += '0),';
3086 asColumns.append(sTmp);
3087
3088 #
3089 # Format the columns into a line.
3090 #
3091 sLine = '';
3092 for i, s in enumerate(asColumns):
3093 if len(sLine) < aoffColumns[i]:
3094 sLine += ' ' * (aoffColumns[i] - len(sLine));
3095 else:
3096 sLine += ' ';
3097 sLine += s;
3098
3099 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3100 # DISOPTYPE_HARMLESS),
3101 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3102 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3103
3104 asLines.append(sLine);
3105
3106 asLines.append('};');
3107 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3108
3109 #
3110 # Write out the lines.
3111 #
3112 oDstFile.write('\n'.join(asLines));
3113 oDstFile.write('\n');
3114 break; #for now
3115
3116if __name__ == '__main__':
3117 generateDisassemblerTables();
3118
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette