VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66391

Last change on this file since 66391 was 66391, checked in by vboxsync, 8 years ago

IEM: Implemented stmxcsr Md (0f ae !11/3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 134.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66391 2017-04-02 14:56:59Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66391 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## \@op[1-4] locations
126g_kdOpLocations = {
127 'reg': [], ## modrm.reg
128 'rm': [], ## modrm.rm
129 'imm': [], ## immediate instruction data
130 'vvvv': [], ## VEX.vvvv
131
132 # fixed registers.
133 'AL': [],
134 'rAX': [],
135 'rSI': [],
136 'rDI': [],
137 'rFLAGS': [],
138 'CS': [],
139 'DS': [],
140 'ES': [],
141 'FS': [],
142 'GS': [],
143 'SS': [],
144};
145
146## \@op[1-4] types
147##
148## Value fields:
149## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
150## - 1: the location (g_kdOpLocations).
151## - 2: disassembler format string version of the type.
152## - 3: disassembler OP_PARAM_XXX (XXX only).
153##
154## Note! See the A.2.1 in SDM vol 2 for the type names.
155g_kdOpTypes = {
156 # Fixed addresses
157 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
158
159 # ModR/M.rm
160 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
161 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
162 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
163 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
164 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
165 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
166 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
167 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
168 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
169
170 # ModR/M.rm - register only.
171 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
172 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
173
174 # ModR/M.rm - memory only.
175 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
176 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
177 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
178 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
179
180 # ModR/M.reg
181 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
182 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
183 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
184 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
185 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
186 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
187 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
188 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
189 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
190
191 # Immediate values.
192 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
193 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
194 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
195 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
196 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
197 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
198
199 # Address operands (no ModR/M).
200 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
201 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
202
203 # Relative jump targets
204 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
205 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
206
207 # DS:rSI
208 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
209 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
210 # ES:rDI
211 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
212 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
213
214 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
215
216 # Fixed registers.
217 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
218 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
219 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
220 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
221 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
222 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
223 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
224 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
225};
226
227# IDX_ParseFixedReg
228# IDX_ParseVexDest
229
230
231## IEMFORM_XXX mappings.
232g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
233 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
234 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
235 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
236 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
237 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
238 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
239 'M': ( 'ModR/M', [ 'rm', ], ),
240 'M_REG': ( 'ModR/M', [ 'rm', ], ),
241 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
242 'R': ( 'ModR/M', [ 'reg', ], ),
243 'RVM': ( 'ModR/M+VEX', [ 'reg', 'vvvv', 'rm'], ),
244 'MVR': ( 'ModR/M+VEX', [ 'rm', 'vvvv', 'reg'], ),
245 'FIXED': ( 'fixed', None, )
246};
247
248## \@oppfx values.
249g_kdPrefixes = {
250 'none': [],
251 '0x66': [],
252 '0xf3': [],
253 '0xf2': [],
254};
255
256## Special \@opcode tag values.
257g_kdSpecialOpcodes = {
258 '/reg': [],
259 'mr/reg': [],
260 '11 /reg': [],
261 '!11 /reg': [],
262 '11 mr/reg': [],
263 '!11 mr/reg': [],
264};
265
266## Special \@opcodesub tag values.
267g_kdSubOpcodes = {
268 'none': [ None, ],
269 '11 mr/reg': [ '11 mr/reg', ],
270 '11': [ '11 mr/reg', ], ##< alias
271 '!11 mr/reg': [ '!11 mr/reg', ],
272 '!11': [ '!11 mr/reg', ], ##< alias
273};
274
275## Valid values for \@openc
276g_kdEncodings = {
277 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
278 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
279 'prefix': [ None, ], ##< Prefix
280};
281
282## \@opunused, \@opinvalid, \@opinvlstyle
283g_kdInvalidStyles = {
284 'immediate': [], ##< CPU stops decoding immediately after the opcode.
285 'intel-modrm': [], ##< Intel decodes ModR/M.
286 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
287 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
288 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
289};
290
291g_kdCpuNames = {
292 '8086': (),
293 '80186': (),
294 '80286': (),
295 '80386': (),
296 '80486': (),
297};
298
299## \@opcpuid
300g_kdCpuIdFlags = {
301 'vme': 'X86_CPUID_FEATURE_EDX_VME',
302 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
303 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
304 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
305 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
306 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
307 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
308 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
309 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
310 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
311 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
312 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
313 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
314 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
315 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
316 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
317 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
318 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
319 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
320 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
321 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
322 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
323 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
324 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
325 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
326 'aes': 'X86_CPUID_FEATURE_ECX_AES',
327 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
328 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
329 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
330 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
331
332 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
333 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
334 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
335 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
336 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
337 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
338 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
339 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
340 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
341 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
342};
343
344## \@ophints values.
345g_kdHints = {
346 'invalid': 'DISOPTYPE_INVALID', ##<
347 'harmless': 'DISOPTYPE_HARMLESS', ##<
348 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
349 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
350 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
351 'portio': 'DISOPTYPE_PORTIO', ##<
352 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
353 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
354 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
355 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
356 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
357 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
358 'illegal': 'DISOPTYPE_ILLEGAL', ##<
359 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
360 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
361 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
362 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
363 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
364 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
365 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
366 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
367 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
368 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
369 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
370 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
371 ## (only in 16 & 32 bits mode!)
372 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
373 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
374 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
375 'ignores_op_size': '', ##< Ignores both operand size prefixes.
376 'lock_allowed': '', ##< Lock prefix allowed.
377};
378
379## \@opxcpttype values (see SDMv2 2.4, 2.7).
380g_kdXcptTypes = {
381 'none': [],
382 '1': [],
383 '2': [],
384 '3': [],
385 '4': [],
386 '4UA': [],
387 '5': [],
388 '6': [],
389 '7': [],
390 '8': [],
391 '11': [],
392 '12': [],
393 'E1': [],
394 'E1NF': [],
395 'E2': [],
396 'E3': [],
397 'E3NF': [],
398 'E4': [],
399 'E4NF': [],
400 'E5': [],
401 'E5NF': [],
402 'E6': [],
403 'E6NF': [],
404 'E7NF': [],
405 'E9': [],
406 'E9NF': [],
407 'E10': [],
408 'E11': [],
409 'E12': [],
410 'E12NF': [],
411};
412
413
414def _isValidOpcodeByte(sOpcode):
415 """
416 Checks if sOpcode is a valid lower case opcode byte.
417 Returns true/false.
418 """
419 if len(sOpcode) == 4:
420 if sOpcode[:2] == '0x':
421 if sOpcode[2] in '0123456789abcdef':
422 if sOpcode[3] in '0123456789abcdef':
423 return True;
424 return False;
425
426
427class InstructionMap(object):
428 """
429 Instruction map.
430
431 The opcode map provides the lead opcode bytes (empty for the one byte
432 opcode map). An instruction can be member of multiple opcode maps as long
433 as it uses the same opcode value within the map (because of VEX).
434 """
435
436 kdEncodings = {
437 'legacy': [],
438 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
439 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
440 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
441 'xop8': [], ##< XOP prefix with vvvvv = 8
442 'xop9': [], ##< XOP prefix with vvvvv = 9
443 'xop10': [], ##< XOP prefix with vvvvv = 10
444 };
445 ## Selectors.
446 ## The first value is the number of table entries required by a
447 ## decoder or disassembler for this type of selector.
448 kdSelectors = {
449 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
450 '/r': [ 8, ], ##< modrm.reg selects the instruction.
451 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
452 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
453 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
454 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
455 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
456 };
457
458 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
459 assert sSelector in self.kdSelectors;
460 assert sEncoding in self.kdEncodings;
461 if asLeadOpcodes is None:
462 asLeadOpcodes = [];
463 else:
464 for sOpcode in asLeadOpcodes:
465 assert _isValidOpcodeByte(sOpcode);
466 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
467
468 self.sName = sName;
469 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
470 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
471 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
472 self.aoInstructions = []; # type: Instruction
473 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
474
475 def getTableSize(self):
476 """
477 Number of table entries. This corresponds directly to the selector.
478 """
479 return self.kdSelectors[self.sSelector][0];
480
481 def getInstructionIndex(self, oInstr):
482 """
483 Returns the table index for the instruction.
484 """
485 bOpcode = oInstr.getOpcodeByte();
486
487 # The byte selector is simple. We need a full opcode byte and need just return it.
488 if self.sSelector == 'byte':
489 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
490 return bOpcode;
491
492 # The other selectors needs masking and shifting.
493 if self.sSelector == '/r':
494 return (bOpcode >> 3) & 0x7;
495
496 if self.sSelector == 'mod /r':
497 return (bOpcode >> 3) & 0x1f;
498
499 if self.sSelector == 'memreg /r':
500 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
501
502 if self.sSelector == '!11 /r':
503 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
504 return (bOpcode >> 3) & 0x7;
505
506 if self.sSelector == '11 /r':
507 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
508 return (bOpcode >> 3) & 0x7;
509
510 if self.sSelector == '11':
511 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
512 return bOpcode & 0x3f;
513
514 assert False, self.sSelector;
515 return -1;
516
517 def getInstructionsInTableOrder(self):
518 """
519 Get instructions in table order.
520
521 Returns array of instructions. Normally there is exactly one
522 instruction per entry. However the entry could also be None if
523 not instruction was specified for that opcode value. Or there
524 could be a list of instructions to deal with special encodings
525 where for instance prefix (e.g. REX.W) encodes a different
526 instruction or different CPUs have different instructions or
527 prefixes in the same place.
528 """
529 # Start with empty table.
530 cTable = self.getTableSize();
531 aoTable = [None] * cTable;
532
533 # Insert the instructions.
534 for oInstr in self.aoInstructions:
535 if oInstr.sOpcode:
536 idxOpcode = self.getInstructionIndex(oInstr);
537 assert idxOpcode < cTable, str(idxOpcode);
538
539 oExisting = aoTable[idxOpcode];
540 if oExisting is None:
541 aoTable[idxOpcode] = oInstr;
542 elif not isinstance(oExisting, list):
543 aoTable[idxOpcode] = list([oExisting, oInstr]);
544 else:
545 oExisting.append(oInstr);
546
547 return aoTable;
548
549
550 def getDisasTableName(self):
551 """
552 Returns the disassembler table name for this map.
553 """
554 sName = 'g_aDisas';
555 for sWord in self.sName.split('_'):
556 if sWord == 'm': # suffix indicating modrm.mod==mem
557 sName += '_m';
558 elif sWord == 'r': # suffix indicating modrm.mod==reg
559 sName += '_r';
560 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
561 sName += '_' + sWord;
562 else:
563 sWord = sWord.replace('grp', 'Grp');
564 sWord = sWord.replace('map', 'Map');
565 sName += sWord[0].upper() + sWord[1:];
566 return sName;
567
568
569class TestType(object):
570 """
571 Test value type.
572
573 This base class deals with integer like values. The fUnsigned constructor
574 parameter indicates the default stance on zero vs sign extending. It is
575 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
576 """
577 def __init__(self, sName, acbSizes = None, fUnsigned = True):
578 self.sName = sName;
579 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
580 self.fUnsigned = fUnsigned;
581
582 class BadValue(Exception):
583 """ Bad value exception. """
584 def __init__(self, sMessage):
585 Exception.__init__(self, sMessage);
586 self.sMessage = sMessage;
587
588 ## For ascii ~ operator.
589 kdHexInv = {
590 '0': 'f',
591 '1': 'e',
592 '2': 'd',
593 '3': 'c',
594 '4': 'b',
595 '5': 'a',
596 '6': '9',
597 '7': '8',
598 '8': '7',
599 '9': '6',
600 'a': '5',
601 'b': '4',
602 'c': '3',
603 'd': '2',
604 'e': '1',
605 'f': '0',
606 };
607
608 def get(self, sValue):
609 """
610 Get the shortest normal sized byte representation of oValue.
611
612 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
613 The latter form is for AND+OR pairs where the first entry is what to
614 AND with the field and the second the one or OR with.
615
616 Raises BadValue if invalid value.
617 """
618 if not sValue:
619 raise TestType.BadValue('empty value');
620
621 # Deal with sign and detect hexadecimal or decimal.
622 fSignExtend = not self.fUnsigned;
623 if sValue[0] == '-' or sValue[0] == '+':
624 fSignExtend = True;
625 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
626 else:
627 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
628
629 # try convert it to long integer.
630 try:
631 iValue = long(sValue, 16 if fHex else 10);
632 except Exception as oXcpt:
633 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
634
635 # Convert the hex string and pad it to a decent value. Negative values
636 # needs to be manually converted to something non-negative (~-n + 1).
637 if iValue >= 0:
638 sHex = hex(iValue);
639 if sys.version_info[0] < 3:
640 assert sHex[-1] == 'L';
641 sHex = sHex[:-1];
642 assert sHex[:2] == '0x';
643 sHex = sHex[2:];
644 else:
645 sHex = hex(-iValue - 1);
646 if sys.version_info[0] < 3:
647 assert sHex[-1] == 'L';
648 sHex = sHex[:-1];
649 assert sHex[:2] == '0x';
650 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
651 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
652 sHex = 'f' + sHex;
653
654 cDigits = len(sHex);
655 if cDigits <= self.acbSizes[-1] * 2:
656 for cb in self.acbSizes:
657 cNaturalDigits = cb * 2;
658 if cDigits <= cNaturalDigits:
659 break;
660 else:
661 cNaturalDigits = self.acbSizes[-1] * 2;
662 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
663 assert isinstance(cNaturalDigits, int)
664
665 if cNaturalDigits != cDigits:
666 cNeeded = cNaturalDigits - cDigits;
667 if iValue >= 0:
668 sHex = ('0' * cNeeded) + sHex;
669 else:
670 sHex = ('f' * cNeeded) + sHex;
671
672 # Invert and convert to bytearray and return it.
673 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
674
675 return ((fSignExtend, abValue),);
676
677 def validate(self, sValue):
678 """
679 Returns True if value is okay, error message on failure.
680 """
681 try:
682 self.get(sValue);
683 except TestType.BadValue as oXcpt:
684 return oXcpt.sMessage;
685 return True;
686
687 def isAndOrPair(self, sValue):
688 """
689 Checks if sValue is a pair.
690 """
691 _ = sValue;
692 return False;
693
694
695class TestTypeEflags(TestType):
696 """
697 Special value parsing for EFLAGS/RFLAGS/FLAGS.
698 """
699
700 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
701
702 def __init__(self, sName):
703 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
704
705 def get(self, sValue):
706 fClear = 0;
707 fSet = 0;
708 for sFlag in sValue.split(','):
709 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
710 if sConstant is None:
711 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
712 if sConstant[0] == '!':
713 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
714 else:
715 fSet |= g_kdX86EFlagsConstants[sConstant];
716
717 aoSet = TestType.get(self, '0x%x' % (fSet,));
718 if fClear != 0:
719 aoClear = TestType.get(self, '%#x' % (fClear,))
720 assert self.isAndOrPair(sValue) is True;
721 return (aoClear[0], aoSet[0]);
722 assert self.isAndOrPair(sValue) is False;
723 return aoSet;
724
725 def isAndOrPair(self, sValue):
726 for sZeroFlag in self.kdZeroValueFlags:
727 if sValue.find(sZeroFlag) >= 0:
728 return True;
729 return False;
730
731
732
733class TestInOut(object):
734 """
735 One input or output state modifier.
736
737 This should be thought as values to modify BS3REGCTX and extended (needs
738 to be structured) state.
739 """
740 ## Assigned operators.
741 kasOperators = [
742 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
743 '&~=',
744 '&=',
745 '|=',
746 '='
747 ];
748 ## Types
749 kdTypes = {
750 'uint': TestType('uint', fUnsigned = True),
751 'int': TestType('int'),
752 'efl': TestTypeEflags('efl'),
753 };
754 ## CPU context fields.
755 kdFields = {
756 # name: ( default type, [both|input|output], )
757 # Operands.
758 'op1': ( 'uint', 'both', ), ## \@op1
759 'op2': ( 'uint', 'both', ), ## \@op2
760 'op3': ( 'uint', 'both', ), ## \@op3
761 'op4': ( 'uint', 'both', ), ## \@op4
762 # Flags.
763 'efl': ( 'efl', 'both', ),
764 'efl_undef': ( 'uint', 'output', ),
765 # 8-bit GPRs.
766 'al': ( 'uint', 'both', ),
767 'cl': ( 'uint', 'both', ),
768 'dl': ( 'uint', 'both', ),
769 'bl': ( 'uint', 'both', ),
770 'ah': ( 'uint', 'both', ),
771 'ch': ( 'uint', 'both', ),
772 'dh': ( 'uint', 'both', ),
773 'bh': ( 'uint', 'both', ),
774 'r8l': ( 'uint', 'both', ),
775 'r9l': ( 'uint', 'both', ),
776 'r10l': ( 'uint', 'both', ),
777 'r11l': ( 'uint', 'both', ),
778 'r12l': ( 'uint', 'both', ),
779 'r13l': ( 'uint', 'both', ),
780 'r14l': ( 'uint', 'both', ),
781 'r15l': ( 'uint', 'both', ),
782 # 16-bit GPRs.
783 'ax': ( 'uint', 'both', ),
784 'dx': ( 'uint', 'both', ),
785 'cx': ( 'uint', 'both', ),
786 'bx': ( 'uint', 'both', ),
787 'sp': ( 'uint', 'both', ),
788 'bp': ( 'uint', 'both', ),
789 'si': ( 'uint', 'both', ),
790 'di': ( 'uint', 'both', ),
791 'r8w': ( 'uint', 'both', ),
792 'r9w': ( 'uint', 'both', ),
793 'r10w': ( 'uint', 'both', ),
794 'r11w': ( 'uint', 'both', ),
795 'r12w': ( 'uint', 'both', ),
796 'r13w': ( 'uint', 'both', ),
797 'r14w': ( 'uint', 'both', ),
798 'r15w': ( 'uint', 'both', ),
799 # 32-bit GPRs.
800 'eax': ( 'uint', 'both', ),
801 'edx': ( 'uint', 'both', ),
802 'ecx': ( 'uint', 'both', ),
803 'ebx': ( 'uint', 'both', ),
804 'esp': ( 'uint', 'both', ),
805 'ebp': ( 'uint', 'both', ),
806 'esi': ( 'uint', 'both', ),
807 'edi': ( 'uint', 'both', ),
808 'r8d': ( 'uint', 'both', ),
809 'r9d': ( 'uint', 'both', ),
810 'r10d': ( 'uint', 'both', ),
811 'r11d': ( 'uint', 'both', ),
812 'r12d': ( 'uint', 'both', ),
813 'r13d': ( 'uint', 'both', ),
814 'r14d': ( 'uint', 'both', ),
815 'r15d': ( 'uint', 'both', ),
816 # 64-bit GPRs.
817 'rax': ( 'uint', 'both', ),
818 'rdx': ( 'uint', 'both', ),
819 'rcx': ( 'uint', 'both', ),
820 'rbx': ( 'uint', 'both', ),
821 'rsp': ( 'uint', 'both', ),
822 'rbp': ( 'uint', 'both', ),
823 'rsi': ( 'uint', 'both', ),
824 'rdi': ( 'uint', 'both', ),
825 'r8': ( 'uint', 'both', ),
826 'r9': ( 'uint', 'both', ),
827 'r10': ( 'uint', 'both', ),
828 'r11': ( 'uint', 'both', ),
829 'r12': ( 'uint', 'both', ),
830 'r13': ( 'uint', 'both', ),
831 'r14': ( 'uint', 'both', ),
832 'r15': ( 'uint', 'both', ),
833 # 16-bit, 32-bit or 64-bit registers according to operand size.
834 'oz.rax': ( 'uint', 'both', ),
835 'oz.rdx': ( 'uint', 'both', ),
836 'oz.rcx': ( 'uint', 'both', ),
837 'oz.rbx': ( 'uint', 'both', ),
838 'oz.rsp': ( 'uint', 'both', ),
839 'oz.rbp': ( 'uint', 'both', ),
840 'oz.rsi': ( 'uint', 'both', ),
841 'oz.rdi': ( 'uint', 'both', ),
842 'oz.r8': ( 'uint', 'both', ),
843 'oz.r9': ( 'uint', 'both', ),
844 'oz.r10': ( 'uint', 'both', ),
845 'oz.r11': ( 'uint', 'both', ),
846 'oz.r12': ( 'uint', 'both', ),
847 'oz.r13': ( 'uint', 'both', ),
848 'oz.r14': ( 'uint', 'both', ),
849 'oz.r15': ( 'uint', 'both', ),
850 # FPU Registers
851 'fcw': ( 'uint', 'both', ),
852 'fsw': ( 'uint', 'both', ),
853 'ftw': ( 'uint', 'both', ),
854 'fop': ( 'uint', 'both', ),
855 'fpuip': ( 'uint', 'both', ),
856 'fpucs': ( 'uint', 'both', ),
857 'fpudp': ( 'uint', 'both', ),
858 'fpuds': ( 'uint', 'both', ),
859 'mxcsr': ( 'uint', 'both', ),
860 'st0': ( 'uint', 'both', ),
861 'st1': ( 'uint', 'both', ),
862 'st2': ( 'uint', 'both', ),
863 'st3': ( 'uint', 'both', ),
864 'st4': ( 'uint', 'both', ),
865 'st5': ( 'uint', 'both', ),
866 'st6': ( 'uint', 'both', ),
867 'st7': ( 'uint', 'both', ),
868 # MMX registers.
869 'mm0': ( 'uint', 'both', ),
870 'mm1': ( 'uint', 'both', ),
871 'mm2': ( 'uint', 'both', ),
872 'mm3': ( 'uint', 'both', ),
873 'mm4': ( 'uint', 'both', ),
874 'mm5': ( 'uint', 'both', ),
875 'mm6': ( 'uint', 'both', ),
876 'mm7': ( 'uint', 'both', ),
877 # SSE registers.
878 'xmm0': ( 'uint', 'both', ),
879 'xmm1': ( 'uint', 'both', ),
880 'xmm2': ( 'uint', 'both', ),
881 'xmm3': ( 'uint', 'both', ),
882 'xmm4': ( 'uint', 'both', ),
883 'xmm5': ( 'uint', 'both', ),
884 'xmm6': ( 'uint', 'both', ),
885 'xmm7': ( 'uint', 'both', ),
886 'xmm8': ( 'uint', 'both', ),
887 'xmm9': ( 'uint', 'both', ),
888 'xmm10': ( 'uint', 'both', ),
889 'xmm11': ( 'uint', 'both', ),
890 'xmm12': ( 'uint', 'both', ),
891 'xmm13': ( 'uint', 'both', ),
892 'xmm14': ( 'uint', 'both', ),
893 'xmm15': ( 'uint', 'both', ),
894 'xmm0.lo': ( 'uint', 'both', ),
895 'xmm1.lo': ( 'uint', 'both', ),
896 'xmm2.lo': ( 'uint', 'both', ),
897 'xmm3.lo': ( 'uint', 'both', ),
898 'xmm4.lo': ( 'uint', 'both', ),
899 'xmm5.lo': ( 'uint', 'both', ),
900 'xmm6.lo': ( 'uint', 'both', ),
901 'xmm7.lo': ( 'uint', 'both', ),
902 'xmm8.lo': ( 'uint', 'both', ),
903 'xmm9.lo': ( 'uint', 'both', ),
904 'xmm10.lo': ( 'uint', 'both', ),
905 'xmm11.lo': ( 'uint', 'both', ),
906 'xmm12.lo': ( 'uint', 'both', ),
907 'xmm13.lo': ( 'uint', 'both', ),
908 'xmm14.lo': ( 'uint', 'both', ),
909 'xmm15.lo': ( 'uint', 'both', ),
910 'xmm0.hi': ( 'uint', 'both', ),
911 'xmm1.hi': ( 'uint', 'both', ),
912 'xmm2.hi': ( 'uint', 'both', ),
913 'xmm3.hi': ( 'uint', 'both', ),
914 'xmm4.hi': ( 'uint', 'both', ),
915 'xmm5.hi': ( 'uint', 'both', ),
916 'xmm6.hi': ( 'uint', 'both', ),
917 'xmm7.hi': ( 'uint', 'both', ),
918 'xmm8.hi': ( 'uint', 'both', ),
919 'xmm9.hi': ( 'uint', 'both', ),
920 'xmm10.hi': ( 'uint', 'both', ),
921 'xmm11.hi': ( 'uint', 'both', ),
922 'xmm12.hi': ( 'uint', 'both', ),
923 'xmm13.hi': ( 'uint', 'both', ),
924 'xmm14.hi': ( 'uint', 'both', ),
925 'xmm15.hi': ( 'uint', 'both', ),
926 'xmm0.lo.zx': ( 'uint', 'both', ),
927 'xmm1.lo.zx': ( 'uint', 'both', ),
928 'xmm2.lo.zx': ( 'uint', 'both', ),
929 'xmm3.lo.zx': ( 'uint', 'both', ),
930 'xmm4.lo.zx': ( 'uint', 'both', ),
931 'xmm5.lo.zx': ( 'uint', 'both', ),
932 'xmm6.lo.zx': ( 'uint', 'both', ),
933 'xmm7.lo.zx': ( 'uint', 'both', ),
934 'xmm8.lo.zx': ( 'uint', 'both', ),
935 'xmm9.lo.zx': ( 'uint', 'both', ),
936 'xmm10.lo.zx': ( 'uint', 'both', ),
937 'xmm11.lo.zx': ( 'uint', 'both', ),
938 'xmm12.lo.zx': ( 'uint', 'both', ),
939 'xmm13.lo.zx': ( 'uint', 'both', ),
940 'xmm14.lo.zx': ( 'uint', 'both', ),
941 'xmm15.lo.zx': ( 'uint', 'both', ),
942 'xmm0.dw0': ( 'uint', 'both', ),
943 'xmm1.dw0': ( 'uint', 'both', ),
944 'xmm2.dw0': ( 'uint', 'both', ),
945 'xmm3.dw0': ( 'uint', 'both', ),
946 'xmm4.dw0': ( 'uint', 'both', ),
947 'xmm5.dw0': ( 'uint', 'both', ),
948 'xmm6.dw0': ( 'uint', 'both', ),
949 'xmm7.dw0': ( 'uint', 'both', ),
950 'xmm8.dw0': ( 'uint', 'both', ),
951 'xmm9.dw0': ( 'uint', 'both', ),
952 'xmm10.dw0': ( 'uint', 'both', ),
953 'xmm11.dw0': ( 'uint', 'both', ),
954 'xmm12.dw0': ( 'uint', 'both', ),
955 'xmm13.dw0': ( 'uint', 'both', ),
956 'xmm14.dw0': ( 'uint', 'both', ),
957 'xmm15_dw0': ( 'uint', 'both', ),
958 # AVX registers.
959 'ymm0': ( 'uint', 'both', ),
960 'ymm1': ( 'uint', 'both', ),
961 'ymm2': ( 'uint', 'both', ),
962 'ymm3': ( 'uint', 'both', ),
963 'ymm4': ( 'uint', 'both', ),
964 'ymm5': ( 'uint', 'both', ),
965 'ymm6': ( 'uint', 'both', ),
966 'ymm7': ( 'uint', 'both', ),
967 'ymm8': ( 'uint', 'both', ),
968 'ymm9': ( 'uint', 'both', ),
969 'ymm10': ( 'uint', 'both', ),
970 'ymm11': ( 'uint', 'both', ),
971 'ymm12': ( 'uint', 'both', ),
972 'ymm13': ( 'uint', 'both', ),
973 'ymm14': ( 'uint', 'both', ),
974 'ymm15': ( 'uint', 'both', ),
975
976 # Special ones.
977 'value.xcpt': ( 'uint', 'output', ),
978 };
979
980 def __init__(self, sField, sOp, sValue, sType):
981 assert sField in self.kdFields;
982 assert sOp in self.kasOperators;
983 self.sField = sField;
984 self.sOp = sOp;
985 self.sValue = sValue;
986 self.sType = sType;
987 assert isinstance(sField, str);
988 assert isinstance(sOp, str);
989 assert isinstance(sType, str);
990 assert isinstance(sValue, str);
991
992
993class TestSelector(object):
994 """
995 One selector for an instruction test.
996 """
997 ## Selector compare operators.
998 kasCompareOps = [ '==', '!=' ];
999 ## Selector variables and their valid values.
1000 kdVariables = {
1001 # Operand size.
1002 'size': {
1003 'o16': 'size_o16',
1004 'o32': 'size_o32',
1005 'o64': 'size_o64',
1006 },
1007 # Execution ring.
1008 'ring': {
1009 '0': 'ring_0',
1010 '1': 'ring_1',
1011 '2': 'ring_2',
1012 '3': 'ring_3',
1013 '0..2': 'ring_0_thru_2',
1014 '1..3': 'ring_1_thru_3',
1015 },
1016 # Basic code mode.
1017 'codebits': {
1018 '64': 'code_64bit',
1019 '32': 'code_32bit',
1020 '16': 'code_16bit',
1021 },
1022 # cpu modes.
1023 'mode': {
1024 'real': 'mode_real',
1025 'prot': 'mode_prot',
1026 'long': 'mode_long',
1027 'v86': 'mode_v86',
1028 'smm': 'mode_smm',
1029 'vmx': 'mode_vmx',
1030 'svm': 'mode_svm',
1031 },
1032 # paging on/off
1033 'paging': {
1034 'on': 'paging_on',
1035 'off': 'paging_off',
1036 },
1037 };
1038 ## Selector shorthand predicates.
1039 ## These translates into variable expressions.
1040 kdPredicates = {
1041 'o16': 'size==o16',
1042 'o32': 'size==o32',
1043 'o64': 'size==o64',
1044 'ring0': 'ring==0',
1045 '!ring0': 'ring==1..3',
1046 'ring1': 'ring==1',
1047 'ring2': 'ring==2',
1048 'ring3': 'ring==3',
1049 'user': 'ring==3',
1050 'supervisor': 'ring==0..2',
1051 'real': 'mode==real',
1052 'prot': 'mode==prot',
1053 'long': 'mode==long',
1054 'v86': 'mode==v86',
1055 'smm': 'mode==smm',
1056 'vmx': 'mode==vmx',
1057 'svm': 'mode==svm',
1058 'paging': 'paging==on',
1059 '!paging': 'paging==off',
1060 };
1061
1062 def __init__(self, sVariable, sOp, sValue):
1063 assert sVariable in self.kdVariables;
1064 assert sOp in self.kasCompareOps;
1065 assert sValue in self.kdVariables[sVariable];
1066 self.sVariable = sVariable;
1067 self.sOp = sOp;
1068 self.sValue = sValue;
1069
1070
1071class InstructionTest(object):
1072 """
1073 Instruction test.
1074 """
1075
1076 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1077 self.oInstr = oInstr; # type: InstructionTest
1078 self.aoInputs = []; # type: list(TestInOut)
1079 self.aoOutputs = []; # type: list(TestInOut)
1080 self.aoSelectors = []; # type: list(TestSelector)
1081
1082 def toString(self, fRepr = False):
1083 """
1084 Converts it to string representation.
1085 """
1086 asWords = [];
1087 if self.aoSelectors:
1088 for oSelector in self.aoSelectors:
1089 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1090 asWords.append('/');
1091
1092 for oModifier in self.aoInputs:
1093 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1094
1095 asWords.append('->');
1096
1097 for oModifier in self.aoOutputs:
1098 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1099
1100 if fRepr:
1101 return '<' + ' '.join(asWords) + '>';
1102 return ' '.join(asWords);
1103
1104 def __str__(self):
1105 """ Provide string represenation. """
1106 return self.toString(False);
1107
1108 def __repr__(self):
1109 """ Provide unambigious string representation. """
1110 return self.toString(True);
1111
1112class Operand(object):
1113 """
1114 Instruction operand.
1115 """
1116
1117 def __init__(self, sWhere, sType):
1118 assert sWhere in g_kdOpLocations, sWhere;
1119 assert sType in g_kdOpTypes, sType;
1120 self.sWhere = sWhere; ##< g_kdOpLocations
1121 self.sType = sType; ##< g_kdOpTypes
1122
1123 def usesModRM(self):
1124 """ Returns True if using some form of ModR/M encoding. """
1125 return self.sType[0] in ['E', 'G', 'M'];
1126
1127
1128
1129class Instruction(object): # pylint: disable=too-many-instance-attributes
1130 """
1131 Instruction.
1132 """
1133
1134 def __init__(self, sSrcFile, iLine):
1135 ## @name Core attributes.
1136 ## @{
1137 self.sMnemonic = None;
1138 self.sBrief = None;
1139 self.asDescSections = []; # type: list(str)
1140 self.aoMaps = []; # type: list(InstructionMap)
1141 self.aoOperands = []; # type: list(Operand)
1142 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1143 self.sOpcode = None; # type: str
1144 self.sSubOpcode = None; # type: str
1145 self.sEncoding = None;
1146 self.asFlTest = None;
1147 self.asFlModify = None;
1148 self.asFlUndefined = None;
1149 self.asFlSet = None;
1150 self.asFlClear = None;
1151 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1152 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1153 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1154 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1155 self.aoTests = []; # type: list(InstructionTest)
1156 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1157 self.oCpuExpr = None; ##< Some CPU restriction expression...
1158 self.sGroup = None;
1159 self.fUnused = False; ##< Unused instruction.
1160 self.fInvalid = False; ##< Invalid instruction (like UD2).
1161 self.sInvalidStyle = None; ##< Invalid behviour style
1162 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1163 ## @}
1164
1165 ## @name Implementation attributes.
1166 ## @{
1167 self.sStats = None;
1168 self.sFunction = None;
1169 self.fStub = False;
1170 self.fUdStub = False;
1171 ## @}
1172
1173 ## @name Decoding info
1174 ## @{
1175 self.sSrcFile = sSrcFile;
1176 self.iLineCreated = iLine;
1177 self.iLineCompleted = None;
1178 self.cOpTags = 0;
1179 self.iLineFnIemOpMacro = -1;
1180 self.iLineMnemonicMacro = -1;
1181 ## @}
1182
1183 ## @name Intermediate input fields.
1184 ## @{
1185 self.sRawDisOpNo = None;
1186 self.asRawDisParams = [];
1187 self.sRawIemOpFlags = None;
1188 self.sRawOldOpcodes = None;
1189 self.asCopyTests = [];
1190 ## @}
1191
1192 def toString(self, fRepr = False):
1193 """ Turn object into a string. """
1194 aasFields = [];
1195
1196 aasFields.append(['opcode', self.sOpcode]);
1197 aasFields.append(['mnemonic', self.sMnemonic]);
1198 for iOperand, oOperand in enumerate(self.aoOperands):
1199 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1200 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1201 aasFields.append(['encoding', self.sEncoding]);
1202 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1203 aasFields.append(['disenum', self.sDisEnum]);
1204 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1205 aasFields.append(['group', self.sGroup]);
1206 if self.fUnused: aasFields.append(['unused', 'True']);
1207 if self.fInvalid: aasFields.append(['invalid', 'True']);
1208 aasFields.append(['invlstyle', self.sInvalidStyle]);
1209 aasFields.append(['fltest', self.asFlTest]);
1210 aasFields.append(['flmodify', self.asFlModify]);
1211 aasFields.append(['flundef', self.asFlUndefined]);
1212 aasFields.append(['flset', self.asFlSet]);
1213 aasFields.append(['flclear', self.asFlClear]);
1214 aasFields.append(['mincpu', self.sMinCpu]);
1215 aasFields.append(['stats', self.sStats]);
1216 aasFields.append(['sFunction', self.sFunction]);
1217 if self.fStub: aasFields.append(['fStub', 'True']);
1218 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1219 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1220 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1221 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1222
1223 sRet = '<' if fRepr else '';
1224 for sField, sValue in aasFields:
1225 if sValue != None:
1226 if len(sRet) > 1:
1227 sRet += '; ';
1228 sRet += '%s=%s' % (sField, sValue,);
1229 if fRepr:
1230 sRet += '>';
1231
1232 return sRet;
1233
1234 def __str__(self):
1235 """ Provide string represenation. """
1236 return self.toString(False);
1237
1238 def __repr__(self):
1239 """ Provide unambigious string representation. """
1240 return self.toString(True);
1241
1242 def getOpcodeByte(self):
1243 """
1244 Decodes sOpcode into a byte range integer value.
1245 Raises exception if sOpcode is None or invalid.
1246 """
1247 if self.sOpcode is None:
1248 raise Exception('No opcode byte for %s!' % (self,));
1249 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1250
1251 # Full hex byte form.
1252 if sOpcode[:2] == '0x':
1253 return int(sOpcode, 16);
1254
1255 # The /r form:
1256 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1257 return int(sOpcode[-1:]) << 3;
1258
1259 # The 11/r form:
1260 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1261 return (int(sOpcode[-1:]) << 3) | 0xc0;
1262
1263 # The !11/r form (returns mod=1):
1264 ## @todo this doesn't really work...
1265 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1266 return (int(sOpcode[-1:]) << 3) | 0x80;
1267
1268 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1269
1270 @staticmethod
1271 def _flagsToIntegerMask(asFlags):
1272 """
1273 Returns the integer mask value for asFlags.
1274 """
1275 uRet = 0;
1276 if asFlags:
1277 for sFlag in asFlags:
1278 sConstant = g_kdEFlagsMnemonics[sFlag];
1279 assert sConstant[0] != '!', sConstant
1280 uRet |= g_kdX86EFlagsConstants[sConstant];
1281 return uRet;
1282
1283 def getTestedFlagsMask(self):
1284 """ Returns asFlTest into a integer mask value """
1285 return self._flagsToIntegerMask(self.asFlTest);
1286
1287 def getModifiedFlagsMask(self):
1288 """ Returns asFlModify into a integer mask value """
1289 return self._flagsToIntegerMask(self.asFlModify);
1290
1291 def getUndefinedFlagsMask(self):
1292 """ Returns asFlUndefined into a integer mask value """
1293 return self._flagsToIntegerMask(self.asFlUndefined);
1294
1295 def getSetFlagsMask(self):
1296 """ Returns asFlSet into a integer mask value """
1297 return self._flagsToIntegerMask(self.asFlSet);
1298
1299 def getClearedFlagsMask(self):
1300 """ Returns asFlClear into a integer mask value """
1301 return self._flagsToIntegerMask(self.asFlClear);
1302
1303
1304## All the instructions.
1305g_aoAllInstructions = []; # type: list(Instruction)
1306
1307## All the instructions indexed by statistics name (opstat).
1308g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1309
1310## All the instructions indexed by function name (opfunction).
1311g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1312
1313## Instructions tagged by oponlytest
1314g_aoOnlyTestInstructions = []; # type: list(Instruction)
1315
1316## Instruction maps.
1317g_dInstructionMaps = {
1318 'one': InstructionMap('one'),
1319 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1320 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1321 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1322 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1323 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1324 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1325 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1326 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1327 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1328 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1329 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1330 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1331 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1332 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1333 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1334 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1335 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1336 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1337 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1338
1339 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1340 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1341 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1342 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1343 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1344 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1345 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1346 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1347 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1348 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1349 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1350 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1351 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1352 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1353
1354 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1355 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1356
1357 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1358 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1359 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1360 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1361 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
1362 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1363
1364 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1365 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1366
1367 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1368 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1369 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1370 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1371 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1372 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1373 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1374};
1375
1376
1377
1378class ParserException(Exception):
1379 """ Parser exception """
1380 def __init__(self, sMessage):
1381 Exception.__init__(self, sMessage);
1382
1383
1384class SimpleParser(object):
1385 """
1386 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1387 """
1388
1389 ## @name Parser state.
1390 ## @{
1391 kiCode = 0;
1392 kiCommentMulti = 1;
1393 ## @}
1394
1395 def __init__(self, sSrcFile, asLines, sDefaultMap):
1396 self.sSrcFile = sSrcFile;
1397 self.asLines = asLines;
1398 self.iLine = 0;
1399 self.iState = self.kiCode;
1400 self.sComment = '';
1401 self.iCommentLine = 0;
1402 self.aoCurInstrs = [];
1403
1404 assert sDefaultMap in g_dInstructionMaps;
1405 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1406
1407 self.cTotalInstr = 0;
1408 self.cTotalStubs = 0;
1409 self.cTotalTagged = 0;
1410
1411 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1412 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1413 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1414 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1415 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1416 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1417 self.fDebug = True;
1418
1419 self.dTagHandlers = {
1420 '@opbrief': self.parseTagOpBrief,
1421 '@opdesc': self.parseTagOpDesc,
1422 '@opmnemonic': self.parseTagOpMnemonic,
1423 '@op1': self.parseTagOpOperandN,
1424 '@op2': self.parseTagOpOperandN,
1425 '@op3': self.parseTagOpOperandN,
1426 '@op4': self.parseTagOpOperandN,
1427 '@oppfx': self.parseTagOpPfx,
1428 '@opmaps': self.parseTagOpMaps,
1429 '@opcode': self.parseTagOpcode,
1430 '@opcodesub': self.parseTagOpcodeSub,
1431 '@openc': self.parseTagOpEnc,
1432 '@opfltest': self.parseTagOpEFlags,
1433 '@opflmodify': self.parseTagOpEFlags,
1434 '@opflundef': self.parseTagOpEFlags,
1435 '@opflset': self.parseTagOpEFlags,
1436 '@opflclear': self.parseTagOpEFlags,
1437 '@ophints': self.parseTagOpHints,
1438 '@opdisenum': self.parseTagOpDisEnum,
1439 '@opmincpu': self.parseTagOpMinCpu,
1440 '@opcpuid': self.parseTagOpCpuId,
1441 '@opgroup': self.parseTagOpGroup,
1442 '@opunused': self.parseTagOpUnusedInvalid,
1443 '@opinvalid': self.parseTagOpUnusedInvalid,
1444 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1445 '@optest': self.parseTagOpTest,
1446 '@optestign': self.parseTagOpTestIgnore,
1447 '@optestignore': self.parseTagOpTestIgnore,
1448 '@opcopytests': self.parseTagOpCopyTests,
1449 '@oponlytest': self.parseTagOpOnlyTest,
1450 '@opxcpttype': self.parseTagOpXcptType,
1451 '@opstats': self.parseTagOpStats,
1452 '@opfunction': self.parseTagOpFunction,
1453 '@opdone': self.parseTagOpDone,
1454 };
1455
1456 self.asErrors = [];
1457
1458 def raiseError(self, sMessage):
1459 """
1460 Raise error prefixed with the source and line number.
1461 """
1462 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1463
1464 def raiseCommentError(self, iLineInComment, sMessage):
1465 """
1466 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1467 """
1468 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1469
1470 def error(self, sMessage):
1471 """
1472 Adds an error.
1473 returns False;
1474 """
1475 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1476 return False;
1477
1478 def errorComment(self, iLineInComment, sMessage):
1479 """
1480 Adds a comment error.
1481 returns False;
1482 """
1483 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1484 return False;
1485
1486 def printErrors(self):
1487 """
1488 Print the errors to stderr.
1489 Returns number of errors.
1490 """
1491 if self.asErrors:
1492 sys.stderr.write(u''.join(self.asErrors));
1493 return len(self.asErrors);
1494
1495 def debug(self, sMessage):
1496 """
1497 For debugging.
1498 """
1499 if self.fDebug:
1500 print('debug: %s' % (sMessage,));
1501
1502
1503 def addInstruction(self, iLine = None):
1504 """
1505 Adds an instruction.
1506 """
1507 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1508 g_aoAllInstructions.append(oInstr);
1509 self.aoCurInstrs.append(oInstr);
1510 return oInstr;
1511
1512 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1513 """
1514 Derives the mnemonic and operands from a IEM stats base name like string.
1515 """
1516 if oInstr.sMnemonic is None:
1517 asWords = sStats.split('_');
1518 oInstr.sMnemonic = asWords[0].lower();
1519 if len(asWords) > 1 and not oInstr.aoOperands:
1520 for sType in asWords[1:]:
1521 if sType in g_kdOpTypes:
1522 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1523 else:
1524 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1525 return False;
1526 return True;
1527
1528 def doneInstructionOne(self, oInstr, iLine):
1529 """
1530 Complete the parsing by processing, validating and expanding raw inputs.
1531 """
1532 assert oInstr.iLineCompleted is None;
1533 oInstr.iLineCompleted = iLine;
1534
1535 #
1536 # Specified instructions.
1537 #
1538 if oInstr.cOpTags > 0:
1539 if oInstr.sStats is None:
1540 pass;
1541
1542 #
1543 # Unspecified legacy stuff. We generally only got a few things to go on here.
1544 # /** Opcode 0x0f 0x00 /0. */
1545 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1546 #
1547 else:
1548 #if oInstr.sRawOldOpcodes:
1549 #
1550 #if oInstr.sMnemonic:
1551 pass;
1552
1553 #
1554 # Common defaults.
1555 #
1556
1557 # Guess mnemonic and operands from stats if the former is missing.
1558 if oInstr.sMnemonic is None:
1559 if oInstr.sStats is not None:
1560 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1561 elif oInstr.sFunction is not None:
1562 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1563
1564 # Derive the disassembler op enum constant from the mnemonic.
1565 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1566 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1567
1568 # Derive the IEM statistics base name from mnemonic and operand types.
1569 if oInstr.sStats is None:
1570 if oInstr.sFunction is not None:
1571 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1572 elif oInstr.sMnemonic is not None:
1573 oInstr.sStats = oInstr.sMnemonic;
1574 for oOperand in oInstr.aoOperands:
1575 if oOperand.sType:
1576 oInstr.sStats += '_' + oOperand.sType;
1577
1578 # Derive the IEM function name from mnemonic and operand types.
1579 if oInstr.sFunction is None:
1580 if oInstr.sMnemonic is not None:
1581 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1582 for oOperand in oInstr.aoOperands:
1583 if oOperand.sType:
1584 oInstr.sFunction += '_' + oOperand.sType;
1585 elif oInstr.sStats:
1586 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1587
1588 # Derive encoding from operands.
1589 if oInstr.sEncoding is None:
1590 if not oInstr.aoOperands:
1591 if oInstr.fUnused and oInstr.sSubOpcode:
1592 oInstr.sEncoding = 'ModR/M';
1593 else:
1594 oInstr.sEncoding = 'fixed';
1595 elif oInstr.aoOperands[0].usesModRM():
1596 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1597 oInstr.sEncoding = 'ModR/M+VEX';
1598 else:
1599 oInstr.sEncoding = 'ModR/M';
1600
1601 #
1602 # Apply default map and then add the instruction to all it's groups.
1603 #
1604 if not oInstr.aoMaps:
1605 oInstr.aoMaps = [ self.oDefaultMap, ];
1606 for oMap in oInstr.aoMaps:
1607 oMap.aoInstructions.append(oInstr);
1608
1609 #
1610 # Check the opstat value and add it to the opstat indexed dictionary.
1611 #
1612 if oInstr.sStats:
1613 if oInstr.sStats not in g_dAllInstructionsByStat:
1614 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1615 else:
1616 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1617 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1618
1619 #
1620 # Add to function indexed dictionary. We allow multiple instructions per function.
1621 #
1622 if oInstr.sFunction:
1623 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1624 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1625 else:
1626 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1627
1628 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1629 return True;
1630
1631 def doneInstructions(self, iLineInComment = None):
1632 """
1633 Done with current instruction.
1634 """
1635 for oInstr in self.aoCurInstrs:
1636 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1637 if oInstr.fStub:
1638 self.cTotalStubs += 1;
1639
1640 self.cTotalInstr += len(self.aoCurInstrs);
1641
1642 self.sComment = '';
1643 self.aoCurInstrs = [];
1644 return True;
1645
1646 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1647 """
1648 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1649 is False, only None values and empty strings are replaced.
1650 """
1651 for oInstr in self.aoCurInstrs:
1652 if fOverwrite is not True:
1653 oOldValue = getattr(oInstr, sAttrib);
1654 if oOldValue is not None:
1655 continue;
1656 setattr(oInstr, sAttrib, oValue);
1657
1658 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1659 """
1660 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1661 If fOverwrite is False, only None values and empty strings are replaced.
1662 """
1663 for oInstr in self.aoCurInstrs:
1664 aoArray = getattr(oInstr, sAttrib);
1665 while len(aoArray) <= iEntry:
1666 aoArray.append(None);
1667 if fOverwrite is True or aoArray[iEntry] is None:
1668 aoArray[iEntry] = oValue;
1669
1670 def parseCommentOldOpcode(self, asLines):
1671 """ Deals with 'Opcode 0xff /4' like comments """
1672 asWords = asLines[0].split();
1673 if len(asWords) >= 2 \
1674 and asWords[0] == 'Opcode' \
1675 and ( asWords[1].startswith('0x')
1676 or asWords[1].startswith('0X')):
1677 asWords = asWords[:1];
1678 for iWord, sWord in enumerate(asWords):
1679 if sWord.startswith('0X'):
1680 sWord = '0x' + sWord[:2];
1681 asWords[iWord] = asWords;
1682 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1683
1684 return False;
1685
1686 def ensureInstructionForOpTag(self, iTagLine):
1687 """ Ensure there is an instruction for the op-tag being parsed. """
1688 if not self.aoCurInstrs:
1689 self.addInstruction(self.iCommentLine + iTagLine);
1690 for oInstr in self.aoCurInstrs:
1691 oInstr.cOpTags += 1;
1692 if oInstr.cOpTags == 1:
1693 self.cTotalTagged += 1;
1694 return self.aoCurInstrs[-1];
1695
1696 @staticmethod
1697 def flattenSections(aasSections):
1698 """
1699 Flattens multiline sections into stripped single strings.
1700 Returns list of strings, on section per string.
1701 """
1702 asRet = [];
1703 for asLines in aasSections:
1704 if asLines:
1705 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1706 return asRet;
1707
1708 @staticmethod
1709 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1710 """
1711 Flattens sections into a simple stripped string with newlines as
1712 section breaks. The final section does not sport a trailing newline.
1713 """
1714 # Typical: One section with a single line.
1715 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1716 return aasSections[0][0].strip();
1717
1718 sRet = '';
1719 for iSection, asLines in enumerate(aasSections):
1720 if asLines:
1721 if iSection > 0:
1722 sRet += sSectionSep;
1723 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1724 return sRet;
1725
1726
1727
1728 ## @name Tag parsers
1729 ## @{
1730
1731 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1732 """
1733 Tag: \@opbrief
1734 Value: Text description, multiple sections, appended.
1735
1736 Brief description. If not given, it's the first sentence from @opdesc.
1737 """
1738 oInstr = self.ensureInstructionForOpTag(iTagLine);
1739
1740 # Flatten and validate the value.
1741 sBrief = self.flattenAllSections(aasSections);
1742 if not sBrief:
1743 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1744 if sBrief[-1] != '.':
1745 sBrief = sBrief + '.';
1746 if len(sBrief) > 180:
1747 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1748 offDot = sBrief.find('.');
1749 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1750 offDot = sBrief.find('.', offDot + 1);
1751 if offDot >= 0 and offDot != len(sBrief) - 1:
1752 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1753
1754 # Update the instruction.
1755 if oInstr.sBrief is not None:
1756 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1757 % (sTag, oInstr.sBrief, sBrief,));
1758 _ = iEndLine;
1759 return True;
1760
1761 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1762 """
1763 Tag: \@opdesc
1764 Value: Text description, multiple sections, appended.
1765
1766 It is used to describe instructions.
1767 """
1768 oInstr = self.ensureInstructionForOpTag(iTagLine);
1769 if aasSections:
1770 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1771 return True;
1772
1773 _ = sTag; _ = iEndLine;
1774 return True;
1775
1776 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1777 """
1778 Tag: @opmenmonic
1779 Value: mnemonic
1780
1781 The 'mnemonic' value must be a valid C identifier string. Because of
1782 prefixes, groups and whatnot, there times when the mnemonic isn't that
1783 of an actual assembler mnemonic.
1784 """
1785 oInstr = self.ensureInstructionForOpTag(iTagLine);
1786
1787 # Flatten and validate the value.
1788 sMnemonic = self.flattenAllSections(aasSections);
1789 if not self.oReMnemonic.match(sMnemonic):
1790 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1791 if oInstr.sMnemonic is not None:
1792 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1793 % (sTag, oInstr.sMnemonic, sMnemonic,));
1794 oInstr.sMnemonic = sMnemonic
1795
1796 _ = iEndLine;
1797 return True;
1798
1799 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1800 """
1801 Tags: \@op1, \@op2, \@op3, \@op4
1802 Value: [where:]type
1803
1804 The 'where' value indicates where the operand is found, like the 'reg'
1805 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1806 a list.
1807
1808 The 'type' value indicates the operand type. These follow the types
1809 given in the opcode tables in the CPU reference manuals.
1810 See Instruction.kdOperandTypes for a list.
1811
1812 """
1813 oInstr = self.ensureInstructionForOpTag(iTagLine);
1814 idxOp = int(sTag[-1]) - 1;
1815 assert idxOp >= 0 and idxOp < 4;
1816
1817 # flatten, split up, and validate the "where:type" value.
1818 sFlattened = self.flattenAllSections(aasSections);
1819 asSplit = sFlattened.split(':');
1820 if len(asSplit) == 1:
1821 sType = asSplit[0];
1822 sWhere = None;
1823 elif len(asSplit) == 2:
1824 (sWhere, sType) = asSplit;
1825 else:
1826 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1827
1828 if sType not in g_kdOpTypes:
1829 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1830 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1831 if sWhere is None:
1832 sWhere = g_kdOpTypes[sType][1];
1833 elif sWhere not in g_kdOpLocations:
1834 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1835 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1836
1837 # Insert the operand, refusing to overwrite an existing one.
1838 while idxOp >= len(oInstr.aoOperands):
1839 oInstr.aoOperands.append(None);
1840 if oInstr.aoOperands[idxOp] is not None:
1841 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1842 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1843 sWhere, sType,));
1844 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1845
1846 _ = iEndLine;
1847 return True;
1848
1849 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1850 """
1851 Tag: \@opmaps
1852 Value: map[,map2]
1853
1854 Indicates which maps the instruction is in. There is a default map
1855 associated with each input file.
1856 """
1857 oInstr = self.ensureInstructionForOpTag(iTagLine);
1858
1859 # Flatten, split up and validate the value.
1860 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1861 asMaps = sFlattened.split(',');
1862 if not asMaps:
1863 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1864 for sMap in asMaps:
1865 if sMap not in g_dInstructionMaps:
1866 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1867 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1868
1869 # Add the maps to the current list. Throw errors on duplicates.
1870 for oMap in oInstr.aoMaps:
1871 if oMap.sName in asMaps:
1872 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1873
1874 for sMap in asMaps:
1875 oMap = g_dInstructionMaps[sMap];
1876 if oMap not in oInstr.aoMaps:
1877 oInstr.aoMaps.append(oMap);
1878 else:
1879 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1880
1881 _ = iEndLine;
1882 return True;
1883
1884 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1885 """
1886 Tag: \@oppfx
1887 Value: n/a|none|0x66|0xf3|0xf2
1888
1889 Required prefix for the instruction. (In a (E)VEX context this is the
1890 value of the 'pp' field rather than an actual prefix.)
1891 """
1892 oInstr = self.ensureInstructionForOpTag(iTagLine);
1893
1894 # Flatten and validate the value.
1895 sFlattened = self.flattenAllSections(aasSections);
1896 asPrefixes = sFlattened.split();
1897 if len(asPrefixes) > 1:
1898 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1899
1900 sPrefix = asPrefixes[0].lower();
1901 if sPrefix == 'none':
1902 sPrefix = 'none';
1903 elif sPrefix == 'n/a':
1904 sPrefix = None;
1905 else:
1906 if len(sPrefix) == 2:
1907 sPrefix = '0x' + sPrefix;
1908 if not _isValidOpcodeByte(sPrefix):
1909 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1910
1911 if sPrefix is not None and sPrefix not in g_kdPrefixes:
1912 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
1913
1914 # Set it.
1915 if oInstr.sPrefix is not None:
1916 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1917 oInstr.sPrefix = sPrefix;
1918
1919 _ = iEndLine;
1920 return True;
1921
1922 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1923 """
1924 Tag: \@opcode
1925 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
1926
1927 The opcode byte or sub-byte for the instruction in the context of a map.
1928 """
1929 oInstr = self.ensureInstructionForOpTag(iTagLine);
1930
1931 # Flatten and validate the value.
1932 sOpcode = self.flattenAllSections(aasSections);
1933 if _isValidOpcodeByte(sOpcode):
1934 pass;
1935 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
1936 pass;
1937 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
1938 pass;
1939 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
1940 pass;
1941 else:
1942 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1943
1944 # Set it.
1945 if oInstr.sOpcode is not None:
1946 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1947 oInstr.sOpcode = sOpcode;
1948
1949 _ = iEndLine;
1950 return True;
1951
1952 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
1953 """
1954 Tag: \@opcodesub
1955 Value: none | 11 mr/reg | !11 mr/reg
1956
1957 This is a simple way of dealing with encodings where the mod=3 and mod!=3
1958 represents exactly two different instructions. The more proper way would
1959 be to go via maps with two members, but this is faster.
1960 """
1961 oInstr = self.ensureInstructionForOpTag(iTagLine);
1962
1963 # Flatten and validate the value.
1964 sSubOpcode = self.flattenAllSections(aasSections);
1965 if sSubOpcode not in g_kdSubOpcodes:
1966 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
1967 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
1968
1969 # Set it.
1970 if oInstr.sSubOpcode is not None:
1971 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1972 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
1973 oInstr.sSubOpcode = sSubOpcode;
1974
1975 _ = iEndLine;
1976 return True;
1977
1978 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1979 """
1980 Tag: \@openc
1981 Value: ModR/M|fixed|prefix|<map name>
1982
1983 The instruction operand encoding style.
1984 """
1985 oInstr = self.ensureInstructionForOpTag(iTagLine);
1986
1987 # Flatten and validate the value.
1988 sEncoding = self.flattenAllSections(aasSections);
1989 if sEncoding in g_kdEncodings:
1990 pass;
1991 elif sEncoding in g_dInstructionMaps:
1992 pass;
1993 elif not _isValidOpcodeByte(sEncoding):
1994 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1995
1996 # Set it.
1997 if oInstr.sEncoding is not None:
1998 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1999 % ( sTag, oInstr.sEncoding, sEncoding,));
2000 oInstr.sEncoding = sEncoding;
2001
2002 _ = iEndLine;
2003 return True;
2004
2005 ## EFlags tag to Instruction attribute name.
2006 kdOpFlagToAttr = {
2007 '@opfltest': 'asFlTest',
2008 '@opflmodify': 'asFlModify',
2009 '@opflundef': 'asFlUndefined',
2010 '@opflset': 'asFlSet',
2011 '@opflclear': 'asFlClear',
2012 };
2013
2014 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2015 """
2016 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2017 Value: <eflags specifier>
2018
2019 """
2020 oInstr = self.ensureInstructionForOpTag(iTagLine);
2021
2022 # Flatten, split up and validate the values.
2023 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2024 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2025 asFlags = [];
2026 else:
2027 fRc = True;
2028 for iFlag, sFlag in enumerate(asFlags):
2029 if sFlag not in g_kdEFlagsMnemonics:
2030 if sFlag.strip() in g_kdEFlagsMnemonics:
2031 asFlags[iFlag] = sFlag.strip();
2032 else:
2033 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2034 if not fRc:
2035 return False;
2036
2037 # Set them.
2038 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2039 if asOld is not None:
2040 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2041 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2042
2043 _ = iEndLine;
2044 return True;
2045
2046 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2047 """
2048 Tag: \@ophints
2049 Value: Comma or space separated list of flags and hints.
2050
2051 This covers the disassembler flags table and more.
2052 """
2053 oInstr = self.ensureInstructionForOpTag(iTagLine);
2054
2055 # Flatten as a space separated list, split it up and validate the values.
2056 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2057 if len(asHints) == 1 and asHints[0].lower() == 'none':
2058 asHints = [];
2059 else:
2060 fRc = True;
2061 for iHint, sHint in enumerate(asHints):
2062 if sHint not in g_kdHints:
2063 if sHint.strip() in g_kdHints:
2064 sHint[iHint] = sHint.strip();
2065 else:
2066 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2067 if not fRc:
2068 return False;
2069
2070 # Append them.
2071 for sHint in asHints:
2072 if sHint not in oInstr.dHints:
2073 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2074 else:
2075 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2076
2077 _ = iEndLine;
2078 return True;
2079
2080 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2081 """
2082 Tag: \@opdisenum
2083 Value: OP_XXXX
2084
2085 This is for select a specific (legacy) disassembler enum value for the
2086 instruction.
2087 """
2088 oInstr = self.ensureInstructionForOpTag(iTagLine);
2089
2090 # Flatten and split.
2091 asWords = self.flattenAllSections(aasSections).split();
2092 if len(asWords) != 1:
2093 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2094 if not asWords:
2095 return False;
2096 sDisEnum = asWords[0];
2097 if not self.oReDisEnum.match(sDisEnum):
2098 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2099 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2100
2101 # Set it.
2102 if oInstr.sDisEnum is not None:
2103 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2104 oInstr.sDisEnum = sDisEnum;
2105
2106 _ = iEndLine;
2107 return True;
2108
2109 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2110 """
2111 Tag: \@opmincpu
2112 Value: <simple CPU name>
2113
2114 Indicates when this instruction was introduced.
2115 """
2116 oInstr = self.ensureInstructionForOpTag(iTagLine);
2117
2118 # Flatten the value, split into words, make sure there's just one, valid it.
2119 asCpus = self.flattenAllSections(aasSections).split();
2120 if len(asCpus) > 1:
2121 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2122
2123 sMinCpu = asCpus[0];
2124 if sMinCpu in g_kdCpuNames:
2125 oInstr.sMinCpu = sMinCpu;
2126 else:
2127 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2128 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2129
2130 # Set it.
2131 if oInstr.sMinCpu is None:
2132 oInstr.sMinCpu = sMinCpu;
2133 elif oInstr.sMinCpu != sMinCpu:
2134 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2135
2136 _ = iEndLine;
2137 return True;
2138
2139 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2140 """
2141 Tag: \@opcpuid
2142 Value: none | <CPUID flag specifier>
2143
2144 CPUID feature bit which is required for the instruction to be present.
2145 """
2146 oInstr = self.ensureInstructionForOpTag(iTagLine);
2147
2148 # Flatten as a space separated list, split it up and validate the values.
2149 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2150 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2151 asCpuIds = [];
2152 else:
2153 fRc = True;
2154 for iCpuId, sCpuId in enumerate(asCpuIds):
2155 if sCpuId not in g_kdCpuIdFlags:
2156 if sCpuId.strip() in g_kdCpuIdFlags:
2157 sCpuId[iCpuId] = sCpuId.strip();
2158 else:
2159 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2160 if not fRc:
2161 return False;
2162
2163 # Append them.
2164 for sCpuId in asCpuIds:
2165 if sCpuId not in oInstr.asCpuIds:
2166 oInstr.asCpuIds.append(sCpuId);
2167 else:
2168 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2169
2170 _ = iEndLine;
2171 return True;
2172
2173 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2174 """
2175 Tag: \@opgroup
2176 Value: op_grp1[_subgrp2[_subsubgrp3]]
2177
2178 Instruction grouping.
2179 """
2180 oInstr = self.ensureInstructionForOpTag(iTagLine);
2181
2182 # Flatten as a space separated list, split it up and validate the values.
2183 asGroups = self.flattenAllSections(aasSections).split();
2184 if len(asGroups) != 1:
2185 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2186 sGroup = asGroups[0];
2187 if not self.oReGroupName.match(sGroup):
2188 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2189 % (sTag, sGroup, self.oReGroupName.pattern));
2190
2191 # Set it.
2192 if oInstr.sGroup is not None:
2193 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2194 oInstr.sGroup = sGroup;
2195
2196 _ = iEndLine;
2197 return True;
2198
2199 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2200 """
2201 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2202 Value: <invalid opcode behaviour style>
2203
2204 The \@opunused indicates the specification is for a currently unused
2205 instruction encoding.
2206
2207 The \@opinvalid indicates the specification is for an invalid currently
2208 instruction encoding (like UD2).
2209
2210 The \@opinvlstyle just indicates how CPUs decode the instruction when
2211 not supported (\@opcpuid, \@opmincpu) or disabled.
2212 """
2213 oInstr = self.ensureInstructionForOpTag(iTagLine);
2214
2215 # Flatten as a space separated list, split it up and validate the values.
2216 asStyles = self.flattenAllSections(aasSections).split();
2217 if len(asStyles) != 1:
2218 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2219 sStyle = asStyles[0];
2220 if sStyle not in g_kdInvalidStyles:
2221 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2222 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2223 # Set it.
2224 if oInstr.sInvalidStyle is not None:
2225 return self.errorComment(iTagLine,
2226 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2227 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2228 oInstr.sInvalidStyle = sStyle;
2229 if sTag == '@opunused':
2230 oInstr.fUnused = True;
2231 elif sTag == '@opinvalid':
2232 oInstr.fInvalid = True;
2233
2234 _ = iEndLine;
2235 return True;
2236
2237 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2238 """
2239 Tag: \@optest
2240 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2241 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2242
2243 The main idea here is to generate basic instruction tests.
2244
2245 The probably simplest way of handling the diverse input, would be to use
2246 it to produce size optimized byte code for a simple interpreter that
2247 modifies the register input and output states.
2248
2249 An alternative to the interpreter would be creating multiple tables,
2250 but that becomes rather complicated wrt what goes where and then to use
2251 them in an efficient manner.
2252 """
2253 oInstr = self.ensureInstructionForOpTag(iTagLine);
2254
2255 #
2256 # Do it section by section.
2257 #
2258 for asSectionLines in aasSections:
2259 #
2260 # Sort the input into outputs, inputs and selector conditions.
2261 #
2262 sFlatSection = self.flattenAllSections([asSectionLines,]);
2263 if not sFlatSection:
2264 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2265 continue;
2266 oTest = InstructionTest(oInstr);
2267
2268 asSelectors = [];
2269 asInputs = [];
2270 asOutputs = [];
2271 asCur = asOutputs;
2272 fRc = True;
2273 asWords = sFlatSection.split();
2274 for iWord in range(len(asWords) - 1, -1, -1):
2275 sWord = asWords[iWord];
2276 # Check for array switchers.
2277 if sWord == '->':
2278 if asCur != asOutputs:
2279 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2280 break;
2281 asCur = asInputs;
2282 elif sWord == '/':
2283 if asCur != asInputs:
2284 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2285 break;
2286 asCur = asSelectors;
2287 else:
2288 asCur.insert(0, sWord);
2289
2290 #
2291 # Validate and add selectors.
2292 #
2293 for sCond in asSelectors:
2294 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2295 oSelector = None;
2296 for sOp in TestSelector.kasCompareOps:
2297 off = sCondExp.find(sOp);
2298 if off >= 0:
2299 sVariable = sCondExp[:off];
2300 sValue = sCondExp[off + len(sOp):];
2301 if sVariable in TestSelector.kdVariables:
2302 if sValue in TestSelector.kdVariables[sVariable]:
2303 oSelector = TestSelector(sVariable, sOp, sValue);
2304 else:
2305 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2306 % ( sTag, sValue, sCond,
2307 TestSelector.kdVariables[sVariable].keys(),));
2308 else:
2309 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2310 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2311 break;
2312 if oSelector is not None:
2313 for oExisting in oTest.aoSelectors:
2314 if oExisting.sVariable == oSelector.sVariable:
2315 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2316 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2317 oTest.aoSelectors.append(oSelector);
2318 else:
2319 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2320
2321 #
2322 # Validate outputs and inputs, adding them to the test as we go along.
2323 #
2324 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2325 asValidFieldKinds = [ 'both', sDesc, ];
2326 for sItem in asItems:
2327 oItem = None;
2328 for sOp in TestInOut.kasOperators:
2329 off = sItem.find(sOp);
2330 if off < 0:
2331 continue;
2332 sField = sItem[:off];
2333 sValueType = sItem[off + len(sOp):];
2334 if sField in TestInOut.kdFields \
2335 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2336 asSplit = sValueType.split(':', 1);
2337 sValue = asSplit[0];
2338 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2339 if sType in TestInOut.kdTypes:
2340 oValid = TestInOut.kdTypes[sType].validate(sValue);
2341 if oValid is True:
2342 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2343 oItem = TestInOut(sField, sOp, sValue, sType);
2344 else:
2345 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2346 % ( sTag, sDesc, sItem, ));
2347 else:
2348 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2349 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2350 else:
2351 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2352 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2353 else:
2354 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2355 % ( sTag, sDesc, sField, sItem,
2356 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2357 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2358 break;
2359 if oItem is not None:
2360 for oExisting in aoDst:
2361 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2362 self.errorComment(iTagLine,
2363 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2364 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2365 aoDst.append(oItem);
2366 else:
2367 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2368
2369 #
2370 # .
2371 #
2372 if fRc:
2373 oInstr.aoTests.append(oTest);
2374 else:
2375 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2376 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2377 % (sTag, asSelectors, asInputs, asOutputs,));
2378
2379 _ = iEndLine;
2380 return True;
2381
2382 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2383 """
2384 Tag: \@optestign | \@optestignore
2385 Value: <value is ignored>
2386
2387 This is a simple trick to ignore a test while debugging another.
2388
2389 See also \@oponlytest.
2390 """
2391 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2392 return True;
2393
2394 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2395 """
2396 Tag: \@opcopytests
2397 Value: <opstat | function> [..]
2398 Example: \@opcopytests add_Eb_Gb
2399
2400 Trick to avoid duplicating tests for different encodings of the same
2401 operation.
2402 """
2403 oInstr = self.ensureInstructionForOpTag(iTagLine);
2404
2405 # Flatten, validate and append the copy job to the instruction. We execute
2406 # them after parsing all the input so we can handle forward references.
2407 asToCopy = self.flattenAllSections(aasSections).split();
2408 if not asToCopy:
2409 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2410 for sToCopy in asToCopy:
2411 if sToCopy not in oInstr.asCopyTests:
2412 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2413 oInstr.asCopyTests.append(sToCopy);
2414 else:
2415 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2416 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2417 else:
2418 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2419
2420 _ = iEndLine;
2421 return True;
2422
2423 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2424 """
2425 Tag: \@oponlytest
2426 Value: none
2427
2428 Only test instructions with this tag. This is a trick that is handy
2429 for singling out one or two new instructions or tests.
2430
2431 See also \@optestignore.
2432 """
2433 oInstr = self.ensureInstructionForOpTag(iTagLine);
2434
2435 # Validate and add instruction to only test dictionary.
2436 sValue = self.flattenAllSections(aasSections).strip();
2437 if sValue:
2438 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2439
2440 if oInstr not in g_aoOnlyTestInstructions:
2441 g_aoOnlyTestInstructions.append(oInstr);
2442
2443 _ = iEndLine;
2444 return True;
2445
2446 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2447 """
2448 Tag: \@opxcpttype
2449 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2450
2451 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2452 """
2453 oInstr = self.ensureInstructionForOpTag(iTagLine);
2454
2455 # Flatten as a space separated list, split it up and validate the values.
2456 asTypes = self.flattenAllSections(aasSections).split();
2457 if len(asTypes) != 1:
2458 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2459 sType = asTypes[0];
2460 if sType not in g_kdXcptTypes:
2461 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2462 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2463 # Set it.
2464 if oInstr.sXcptType is not None:
2465 return self.errorComment(iTagLine,
2466 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2467 % ( sTag, oInstr.sXcptType, sType,));
2468 oInstr.sXcptType = sType;
2469
2470 _ = iEndLine;
2471 return True;
2472
2473 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2474 """
2475 Tag: \@opfunction
2476 Value: <VMM function name>
2477
2478 This is for explicitly setting the IEM function name. Normally we pick
2479 this up from the FNIEMOP_XXX macro invocation after the description, or
2480 generate it from the mnemonic and operands.
2481
2482 It it thought it maybe necessary to set it when specifying instructions
2483 which implementation isn't following immediately or aren't implemented yet.
2484 """
2485 oInstr = self.ensureInstructionForOpTag(iTagLine);
2486
2487 # Flatten and validate the value.
2488 sFunction = self.flattenAllSections(aasSections);
2489 if not self.oReFunctionName.match(sFunction):
2490 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2491 % (sTag, sFunction, self.oReFunctionName.pattern));
2492
2493 if oInstr.sFunction is not None:
2494 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2495 % (sTag, oInstr.sFunction, sFunction,));
2496 oInstr.sFunction = sFunction;
2497
2498 _ = iEndLine;
2499 return True;
2500
2501 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2502 """
2503 Tag: \@opstats
2504 Value: <VMM statistics base name>
2505
2506 This is for explicitly setting the statistics name. Normally we pick
2507 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2508 the mnemonic and operands.
2509
2510 It it thought it maybe necessary to set it when specifying instructions
2511 which implementation isn't following immediately or aren't implemented yet.
2512 """
2513 oInstr = self.ensureInstructionForOpTag(iTagLine);
2514
2515 # Flatten and validate the value.
2516 sStats = self.flattenAllSections(aasSections);
2517 if not self.oReStatsName.match(sStats):
2518 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2519 % (sTag, sStats, self.oReStatsName.pattern));
2520
2521 if oInstr.sStats is not None:
2522 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2523 % (sTag, oInstr.sStats, sStats,));
2524 oInstr.sStats = sStats;
2525
2526 _ = iEndLine;
2527 return True;
2528
2529 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2530 """
2531 Tag: \@opdone
2532 Value: none
2533
2534 Used to explictily flush the instructions that have been specified.
2535 """
2536 sFlattened = self.flattenAllSections(aasSections);
2537 if sFlattened != '':
2538 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2539 _ = sTag; _ = iEndLine;
2540 return self.doneInstructions();
2541
2542 ## @}
2543
2544
2545 def parseComment(self):
2546 """
2547 Parse the current comment (self.sComment).
2548
2549 If it's a opcode specifiying comment, we reset the macro stuff.
2550 """
2551 #
2552 # Reject if comment doesn't seem to contain anything interesting.
2553 #
2554 if self.sComment.find('Opcode') < 0 \
2555 and self.sComment.find('@') < 0:
2556 return False;
2557
2558 #
2559 # Split the comment into lines, removing leading asterisks and spaces.
2560 # Also remove leading and trailing empty lines.
2561 #
2562 asLines = self.sComment.split('\n');
2563 for iLine, sLine in enumerate(asLines):
2564 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2565
2566 while asLines and not asLines[0]:
2567 self.iCommentLine += 1;
2568 asLines.pop(0);
2569
2570 while asLines and not asLines[-1]:
2571 asLines.pop(len(asLines) - 1);
2572
2573 #
2574 # Check for old style: Opcode 0x0f 0x12
2575 #
2576 if asLines[0].startswith('Opcode '):
2577 self.parseCommentOldOpcode(asLines);
2578
2579 #
2580 # Look for @op* tagged data.
2581 #
2582 cOpTags = 0;
2583 sFlatDefault = None;
2584 sCurTag = '@default';
2585 iCurTagLine = 0;
2586 asCurSection = [];
2587 aasSections = [ asCurSection, ];
2588 for iLine, sLine in enumerate(asLines):
2589 if not sLine.startswith('@'):
2590 if sLine:
2591 asCurSection.append(sLine);
2592 elif asCurSection:
2593 asCurSection = [];
2594 aasSections.append(asCurSection);
2595 else:
2596 #
2597 # Process the previous tag.
2598 #
2599 if not asCurSection and len(aasSections) > 1:
2600 aasSections.pop(-1);
2601 if sCurTag in self.dTagHandlers:
2602 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2603 cOpTags += 1;
2604 elif sCurTag.startswith('@op'):
2605 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2606 elif sCurTag == '@default':
2607 sFlatDefault = self.flattenAllSections(aasSections);
2608 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2609 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2610 elif sCurTag in ['@encoding', '@opencoding']:
2611 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2612
2613 #
2614 # New tag.
2615 #
2616 asSplit = sLine.split(None, 1);
2617 sCurTag = asSplit[0].lower();
2618 if len(asSplit) > 1:
2619 asCurSection = [asSplit[1],];
2620 else:
2621 asCurSection = [];
2622 aasSections = [asCurSection, ];
2623 iCurTagLine = iLine;
2624
2625 #
2626 # Process the final tag.
2627 #
2628 if not asCurSection and len(aasSections) > 1:
2629 aasSections.pop(-1);
2630 if sCurTag in self.dTagHandlers:
2631 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2632 cOpTags += 1;
2633 elif sCurTag.startswith('@op'):
2634 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2635 elif sCurTag == '@default':
2636 sFlatDefault = self.flattenAllSections(aasSections);
2637
2638 #
2639 # Don't allow default text in blocks containing @op*.
2640 #
2641 if cOpTags > 0 and sFlatDefault:
2642 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2643
2644 return True;
2645
2646 def parseMacroInvocation(self, sInvocation):
2647 """
2648 Parses a macro invocation.
2649
2650 Returns a tuple, first element is the offset following the macro
2651 invocation. The second element is a list of macro arguments, where the
2652 zero'th is the macro name.
2653 """
2654 # First the name.
2655 offOpen = sInvocation.find('(');
2656 if offOpen <= 0:
2657 self.raiseError("macro invocation open parenthesis not found");
2658 sName = sInvocation[:offOpen].strip();
2659 if not self.oReMacroName.match(sName):
2660 return self.error("invalid macro name '%s'" % (sName,));
2661 asRet = [sName, ];
2662
2663 # Arguments.
2664 iLine = self.iLine;
2665 cDepth = 1;
2666 off = offOpen + 1;
2667 offStart = off;
2668 while cDepth > 0:
2669 if off >= len(sInvocation):
2670 if iLine >= len(self.asLines):
2671 return self.error('macro invocation beyond end of file');
2672 sInvocation += self.asLines[iLine];
2673 iLine += 1;
2674 ch = sInvocation[off];
2675
2676 if ch == ',' or ch == ')':
2677 if cDepth == 1:
2678 asRet.append(sInvocation[offStart:off].strip());
2679 offStart = off + 1;
2680 if ch == ')':
2681 cDepth -= 1;
2682 elif ch == '(':
2683 cDepth += 1;
2684 off += 1;
2685
2686 return (off, asRet);
2687
2688 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2689 """
2690 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2691 """
2692 offHit = sCode.find(sMacro);
2693 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2694 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2695 return (offHit + offAfter, asRet);
2696 return (len(sCode), None);
2697
2698 def findAndParseMacroInvocation(self, sCode, sMacro):
2699 """
2700 Returns None if not found, arguments as per parseMacroInvocation if found.
2701 """
2702 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2703
2704 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2705 """
2706 Returns same as findAndParseMacroInvocation.
2707 """
2708 for sMacro in asMacro:
2709 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2710 if asRet is not None:
2711 return asRet;
2712 return None;
2713
2714 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2715 sDisHints, sIemHints, asOperands):
2716 """
2717 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2718 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2719 """
2720 #
2721 # Some invocation checks.
2722 #
2723 if sUpper != sUpper.upper():
2724 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2725 if sLower != sLower.lower():
2726 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2727 if sUpper.lower() != sLower:
2728 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2729 if not self.oReMnemonic.match(sLower):
2730 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2731
2732 #
2733 # Check if sIemHints tells us to not consider this macro invocation.
2734 #
2735 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2736 return True;
2737
2738 # Apply to the last instruction only for now.
2739 if not self.aoCurInstrs:
2740 self.addInstruction();
2741 oInstr = self.aoCurInstrs[-1];
2742 if oInstr.iLineMnemonicMacro == -1:
2743 oInstr.iLineMnemonicMacro = self.iLine;
2744 else:
2745 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2746 % (sMacro, oInstr.iLineMnemonicMacro,));
2747
2748 # Mnemonic
2749 if oInstr.sMnemonic is None:
2750 oInstr.sMnemonic = sLower;
2751 elif oInstr.sMnemonic != sLower:
2752 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2753
2754 # Process operands.
2755 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2756 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2757 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2758 for iOperand, sType in enumerate(asOperands):
2759 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2760 if sWhere is None:
2761 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2762 if iOperand < len(oInstr.aoOperands): # error recovery.
2763 sWhere = oInstr.aoOperands[iOperand].sWhere;
2764 sType = oInstr.aoOperands[iOperand].sType;
2765 else:
2766 sWhere = 'reg';
2767 sType = 'Gb';
2768 if iOperand == len(oInstr.aoOperands):
2769 oInstr.aoOperands.append(Operand(sWhere, sType))
2770 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2771 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2772 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2773 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2774
2775 # Encoding.
2776 if sForm not in g_kdIemForms:
2777 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2778 else:
2779 if oInstr.sEncoding is None:
2780 oInstr.sEncoding = g_kdIemForms[sForm][0];
2781 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2782 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2783 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2784
2785 # Check the parameter locations for the encoding.
2786 if g_kdIemForms[sForm][1] is not None:
2787 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2788 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2789 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2790 else:
2791 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2792 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2793 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2794 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2795
2796 # Stats.
2797 if not self.oReStatsName.match(sStats):
2798 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2799 elif oInstr.sStats is None:
2800 oInstr.sStats = sStats;
2801 elif oInstr.sStats != sStats:
2802 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2803 % (sMacro, oInstr.sStats, sStats,));
2804
2805 # Process the hints (simply merge with @ophints w/o checking anything).
2806 for sHint in sDisHints.split('|'):
2807 sHint = sHint.strip();
2808 if sHint.startswith('DISOPTYPE_'):
2809 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2810 if sShortHint in g_kdHints:
2811 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2812 else:
2813 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2814 elif sHint != '0':
2815 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2816
2817 for sHint in sIemHints.split('|'):
2818 sHint = sHint.strip();
2819 if sHint.startswith('IEMOPHINT_'):
2820 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2821 if sShortHint in g_kdHints:
2822 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2823 else:
2824 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2825 elif sHint != '0':
2826 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2827
2828
2829 _ = sAsm;
2830 return True;
2831
2832 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2833 """
2834 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2835 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2836 """
2837 if not asOperands:
2838 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2839 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2840 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2841
2842 def checkCodeForMacro(self, sCode):
2843 """
2844 Checks code for relevant macro invocation.
2845 """
2846 #
2847 # Scan macro invocations.
2848 #
2849 if sCode.find('(') > 0:
2850 # Look for instruction decoder function definitions. ASSUME single line.
2851 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2852 [ 'FNIEMOP_DEF',
2853 'FNIEMOP_STUB',
2854 'FNIEMOP_STUB_1',
2855 'FNIEMOP_UD_STUB',
2856 'FNIEMOP_UD_STUB_1' ]);
2857 if asArgs is not None:
2858 sFunction = asArgs[1];
2859
2860 if not self.aoCurInstrs:
2861 self.addInstruction();
2862 for oInstr in self.aoCurInstrs:
2863 if oInstr.iLineFnIemOpMacro == -1:
2864 oInstr.iLineFnIemOpMacro = self.iLine;
2865 else:
2866 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2867 self.setInstrunctionAttrib('sFunction', sFunction);
2868 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2869 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
2870 if asArgs[0].find('STUB') > 0:
2871 self.doneInstructions();
2872 return True;
2873
2874 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
2875 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
2876 if asArgs is not None:
2877 if len(self.aoCurInstrs) == 1:
2878 oInstr = self.aoCurInstrs[0];
2879 if oInstr.sStats is None:
2880 oInstr.sStats = asArgs[1];
2881 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
2882
2883 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2884 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
2885 if asArgs is not None:
2886 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
2887 []);
2888 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2889 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
2890 if asArgs is not None:
2891 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
2892 [asArgs[6],]);
2893 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2894 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
2895 if asArgs is not None:
2896 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
2897 [asArgs[6], asArgs[7]]);
2898 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2899 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
2900 if asArgs is not None:
2901 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
2902 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
2903 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
2904 # a_fIemHints)
2905 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
2906 if asArgs is not None:
2907 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
2908 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
2909
2910 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
2911 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
2912 if asArgs is not None:
2913 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
2914 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
2915 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
2916 if asArgs is not None:
2917 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
2918 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
2919 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
2920 if asArgs is not None:
2921 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
2922 [asArgs[4], asArgs[5],]);
2923 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
2924 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
2925 if asArgs is not None:
2926 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
2927 [asArgs[4], asArgs[5], asArgs[6],]);
2928 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
2929 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
2930 if asArgs is not None:
2931 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
2932 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
2933
2934 return False;
2935
2936
2937 def parse(self):
2938 """
2939 Parses the given file.
2940 Returns number or errors.
2941 Raises exception on fatal trouble.
2942 """
2943 self.debug('Parsing %s' % (self.sSrcFile,));
2944
2945 while self.iLine < len(self.asLines):
2946 sLine = self.asLines[self.iLine];
2947 self.iLine += 1;
2948
2949 # We only look for comments, so only lines with a slash might possibly
2950 # influence the parser state.
2951 if sLine.find('/') >= 0:
2952 #self.debug('line %d: slash' % (self.iLine,));
2953
2954 offLine = 0;
2955 while offLine < len(sLine):
2956 if self.iState == self.kiCode:
2957 offHit = sLine.find('/*', offLine); # only multiline comments for now.
2958 if offHit >= 0:
2959 self.checkCodeForMacro(sLine[offLine:offHit]);
2960 self.sComment = '';
2961 self.iCommentLine = self.iLine;
2962 self.iState = self.kiCommentMulti;
2963 offLine = offHit + 2;
2964 else:
2965 self.checkCodeForMacro(sLine[offLine:]);
2966 offLine = len(sLine);
2967
2968 elif self.iState == self.kiCommentMulti:
2969 offHit = sLine.find('*/', offLine);
2970 if offHit >= 0:
2971 self.sComment += sLine[offLine:offHit];
2972 self.iState = self.kiCode;
2973 offLine = offHit + 2;
2974 self.parseComment();
2975 else:
2976 self.sComment += sLine[offLine:];
2977 offLine = len(sLine);
2978 else:
2979 assert False;
2980
2981 # No slash, but append the line if in multi-line comment.
2982 elif self.iState == self.kiCommentMulti:
2983 #self.debug('line %d: multi' % (self.iLine,));
2984 self.sComment += sLine;
2985
2986 # No slash, but check code line for relevant macro.
2987 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
2988 #self.debug('line %d: macro' % (self.iLine,));
2989 self.checkCodeForMacro(sLine);
2990
2991 # If the line is a '}' in the first position, complete the instructions.
2992 elif self.iState == self.kiCode and sLine[0] == '}':
2993 #self.debug('line %d: }' % (self.iLine,));
2994 self.doneInstructions();
2995
2996 self.doneInstructions();
2997 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
2998 self.debug('%s instruction stubs' % (self.cTotalStubs,));
2999 return self.printErrors();
3000
3001
3002def __parseFileByName(sSrcFile, sDefaultMap):
3003 """
3004 Parses one source file for instruction specfications.
3005 """
3006 #
3007 # Read sSrcFile into a line array.
3008 #
3009 try:
3010 oFile = open(sSrcFile, "r");
3011 except Exception as oXcpt:
3012 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3013 try:
3014 asLines = oFile.readlines();
3015 except Exception as oXcpt:
3016 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3017 finally:
3018 oFile.close();
3019
3020 #
3021 # Do the parsing.
3022 #
3023 try:
3024 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3025 except ParserException as oXcpt:
3026 print(str(oXcpt));
3027 raise;
3028 except Exception as oXcpt:
3029 raise;
3030
3031 return cErrors;
3032
3033
3034def __doTestCopying():
3035 """
3036 Executes the asCopyTests instructions.
3037 """
3038 asErrors = [];
3039 for oDstInstr in g_aoAllInstructions:
3040 if oDstInstr.asCopyTests:
3041 for sSrcInstr in oDstInstr.asCopyTests:
3042 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3043 if oSrcInstr:
3044 aoSrcInstrs = [oSrcInstr,];
3045 else:
3046 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3047 if aoSrcInstrs:
3048 for oSrcInstr in aoSrcInstrs:
3049 if oSrcInstr != oDstInstr:
3050 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3051 else:
3052 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3053 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3054 else:
3055 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3056 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3057
3058 if asErrors:
3059 sys.stderr.write(u''.join(asErrors));
3060 return len(asErrors);
3061
3062
3063def __applyOnlyTest():
3064 """
3065 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3066 all other instructions so that only these get tested.
3067 """
3068 if g_aoOnlyTestInstructions:
3069 for oInstr in g_aoAllInstructions:
3070 if oInstr.aoTests:
3071 if oInstr not in g_aoOnlyTestInstructions:
3072 oInstr.aoTests = [];
3073 return 0;
3074
3075def __parseAll():
3076 """
3077 Parses all the IEMAllInstruction*.cpp.h files.
3078
3079 Raises exception on failure.
3080 """
3081 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3082 cErrors = 0;
3083 for sDefaultMap, sName in [
3084 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3085 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3086 ]:
3087 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3088 cErrors += __doTestCopying();
3089 cErrors += __applyOnlyTest();
3090
3091 if cErrors != 0:
3092 #raise Exception('%d parse errors' % (cErrors,));
3093 sys.exit(1);
3094 return True;
3095
3096
3097
3098__parseAll();
3099
3100
3101#
3102# Generators (may perhaps move later).
3103#
3104def generateDisassemblerTables(oDstFile = sys.stdout):
3105 """
3106 Generates disassembler tables.
3107 """
3108
3109 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3110 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3111 assert oMap.sName == sName;
3112 asLines = [];
3113
3114 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3115 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3116 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3117 asLines.append('{');
3118
3119 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3120
3121 aoTableOrder = oMap.getInstructionsInTableOrder();
3122 for iInstr, oInstr in enumerate(aoTableOrder):
3123
3124 if (iInstr & 0xf) == 0:
3125 if iInstr != 0:
3126 asLines.append('');
3127 asLines.append(' /* %x */' % (iInstr >> 4,));
3128
3129 if oInstr is None:
3130 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3131 elif isinstance(oInstr, list):
3132 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3133 else:
3134 sMacro = 'OP';
3135 cMaxOperands = 3;
3136 if len(oInstr.aoOperands) > 3:
3137 sMacro = 'OPVEX'
3138 cMaxOperands = 4;
3139 assert len(oInstr.aoOperands) <= cMaxOperands;
3140
3141 #
3142 # Format string.
3143 #
3144 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3145 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3146 sTmp += ' ' if iOperand == 0 else ',';
3147 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3148 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3149 else:
3150 sTmp += g_kdOpTypes[oOperand.sType][2];
3151 sTmp += '",';
3152 asColumns = [ sTmp, ];
3153
3154 #
3155 # Decoders.
3156 #
3157 iStart = len(asColumns);
3158 if oInstr.sEncoding is None:
3159 pass;
3160 elif oInstr.sEncoding == 'ModR/M':
3161 # ASSUME the first operand is using the ModR/M encoding
3162 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3163 asColumns.append('IDX_ParseModRM,');
3164 ## @todo IDX_ParseVexDest
3165 # Is second operand using ModR/M too?
3166 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3167 asColumns.append('IDX_UseModRM,')
3168 elif oInstr.sEncoding in [ 'prefix', ]:
3169 for oOperand in oInstr.aoOperands:
3170 asColumns.append('0,');
3171 elif oInstr.sEncoding in [ 'fixed' ]:
3172 pass;
3173 elif oInstr.sEncoding == 'vex2':
3174 asColumns.append('IDX_ParseVex2b,')
3175 elif oInstr.sEncoding == 'vex3':
3176 asColumns.append('IDX_ParseVex3b,')
3177 elif oInstr.sEncoding in g_dInstructionMaps:
3178 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3179 else:
3180 ## @todo
3181 #IDX_ParseTwoByteEsc,
3182 #IDX_ParseGrp1,
3183 #IDX_ParseShiftGrp2,
3184 #IDX_ParseGrp3,
3185 #IDX_ParseGrp4,
3186 #IDX_ParseGrp5,
3187 #IDX_Parse3DNow,
3188 #IDX_ParseGrp6,
3189 #IDX_ParseGrp7,
3190 #IDX_ParseGrp8,
3191 #IDX_ParseGrp9,
3192 #IDX_ParseGrp10,
3193 #IDX_ParseGrp12,
3194 #IDX_ParseGrp13,
3195 #IDX_ParseGrp14,
3196 #IDX_ParseGrp15,
3197 #IDX_ParseGrp16,
3198 #IDX_ParseThreeByteEsc4,
3199 #IDX_ParseThreeByteEsc5,
3200 #IDX_ParseModFence,
3201 #IDX_ParseEscFP,
3202 #IDX_ParseNopPause,
3203 #IDX_ParseInvOpModRM,
3204 assert False, str(oInstr);
3205
3206 # Check for immediates and stuff in the remaining operands.
3207 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3208 sIdx = g_kdOpTypes[oOperand.sType][0];
3209 if sIdx != 'IDX_UseModRM':
3210 asColumns.append(sIdx + ',');
3211 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3212
3213 #
3214 # Opcode and operands.
3215 #
3216 assert oInstr.sDisEnum, str(oInstr);
3217 asColumns.append(oInstr.sDisEnum + ',');
3218 iStart = len(asColumns)
3219 for oOperand in oInstr.aoOperands:
3220 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3221 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3222
3223 #
3224 # Flags.
3225 #
3226 sTmp = '';
3227 for sHint in sorted(oInstr.dHints.keys()):
3228 sDefine = g_kdHints[sHint];
3229 if sDefine.startswith('DISOPTYPE_'):
3230 if sTmp:
3231 sTmp += ' | ' + sDefine;
3232 else:
3233 sTmp += sDefine;
3234 if sTmp:
3235 sTmp += '),';
3236 else:
3237 sTmp += '0),';
3238 asColumns.append(sTmp);
3239
3240 #
3241 # Format the columns into a line.
3242 #
3243 sLine = '';
3244 for i, s in enumerate(asColumns):
3245 if len(sLine) < aoffColumns[i]:
3246 sLine += ' ' * (aoffColumns[i] - len(sLine));
3247 else:
3248 sLine += ' ';
3249 sLine += s;
3250
3251 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3252 # DISOPTYPE_HARMLESS),
3253 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3254 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3255
3256 asLines.append(sLine);
3257
3258 asLines.append('};');
3259 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3260
3261 #
3262 # Write out the lines.
3263 #
3264 oDstFile.write('\n'.join(asLines));
3265 oDstFile.write('\n');
3266 break; #for now
3267
3268if __name__ == '__main__':
3269 generateDisassemblerTables();
3270
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette