VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66462

Last change on this file since 66462 was 66462, checked in by vboxsync, 8 years ago

IEM,bs3-cpu-generated-1: Made the current testcases pass on AMD.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 140.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66462 2017-04-06 13:38:13Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66462 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224
225 # ModR/M.rm - register only.
226 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
227 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
228
229 # ModR/M.rm - memory only.
230 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
231 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
232 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
233 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
234 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
235 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
236 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
237
238 # ModR/M.reg
239 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
240 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
241 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
242 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
243 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
244 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
245 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
246 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
247 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
248
249 # Immediate values.
250 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
251 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
252 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
253 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
254 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
255 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
256
257 # Address operands (no ModR/M).
258 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
259 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
260
261 # Relative jump targets
262 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
263 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
264
265 # DS:rSI
266 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
267 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
268 # ES:rDI
269 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
270 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
271
272 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
273
274 # Fixed registers.
275 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
276 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
277 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
278 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
279 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
280 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
281 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
282 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
283};
284
285# IDX_ParseFixedReg
286# IDX_ParseVexDest
287
288
289## IEMFORM_XXX mappings.
290g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
291 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
292 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
293 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
294 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
295 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
296 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
297 'M': ( 'ModR/M', [ 'rm', ], ),
298 'M_REG': ( 'ModR/M', [ 'rm', ], ),
299 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
300 'R': ( 'ModR/M', [ 'reg', ], ),
301
302 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
303 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
304 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
305 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
306 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
307 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
308 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
309 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
310 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
311 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
312 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
313 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
314
315 'FIXED': ( 'fixed', None, )
316};
317
318## \@oppfx values.
319g_kdPrefixes = {
320 'none': [],
321 '0x66': [],
322 '0xf3': [],
323 '0xf2': [],
324};
325
326## Special \@opcode tag values.
327g_kdSpecialOpcodes = {
328 '/reg': [],
329 'mr/reg': [],
330 '11 /reg': [],
331 '!11 /reg': [],
332 '11 mr/reg': [],
333 '!11 mr/reg': [],
334};
335
336## Special \@opcodesub tag values.
337g_kdSubOpcodes = {
338 'none': [ None, ],
339 '11 mr/reg': [ '11 mr/reg', ],
340 '11': [ '11 mr/reg', ], ##< alias
341 '!11 mr/reg': [ '!11 mr/reg', ],
342 '!11': [ '!11 mr/reg', ], ##< alias
343};
344
345## Valid values for \@openc
346g_kdEncodings = {
347 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
348 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
349 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
350 'prefix': [ None, ], ##< Prefix
351};
352
353## \@opunused, \@opinvalid, \@opinvlstyle
354g_kdInvalidStyles = {
355 'immediate': [], ##< CPU stops decoding immediately after the opcode.
356 'intel-modrm': [], ##< Intel decodes ModR/M.
357 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
358 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
359 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
360};
361
362g_kdCpuNames = {
363 '8086': (),
364 '80186': (),
365 '80286': (),
366 '80386': (),
367 '80486': (),
368};
369
370## \@opcpuid
371g_kdCpuIdFlags = {
372 'vme': 'X86_CPUID_FEATURE_EDX_VME',
373 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
374 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
375 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
376 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
377 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
378 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
379 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
380 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
381 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
382 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
383 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
384 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
385 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
386 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
387 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
388 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
389 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
390 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
391 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
392 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
393 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
394 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
395 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
396 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
397 'aes': 'X86_CPUID_FEATURE_ECX_AES',
398 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
399 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
400 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
401 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
402
403 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
404 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
405 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
406 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
407 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
408 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
409 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
410 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
411 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
412 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
413};
414
415## \@ophints values.
416g_kdHints = {
417 'invalid': 'DISOPTYPE_INVALID', ##<
418 'harmless': 'DISOPTYPE_HARMLESS', ##<
419 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
420 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
421 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
422 'portio': 'DISOPTYPE_PORTIO', ##<
423 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
424 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
425 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
426 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
427 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
428 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
429 'illegal': 'DISOPTYPE_ILLEGAL', ##<
430 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
431 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
432 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
433 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
434 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
435 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
436 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
437 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
438 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
439 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
440 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
441 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
442 ## (only in 16 & 32 bits mode!)
443 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
444 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
445 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
446 'ignores_op_size': '', ##< Ignores both operand size prefixes.
447 'lock_allowed': '', ##< Lock prefix allowed.
448};
449
450## \@opxcpttype values (see SDMv2 2.4, 2.7).
451g_kdXcptTypes = {
452 'none': [],
453 '1': [],
454 '2': [],
455 '3': [],
456 '4': [],
457 '4UA': [],
458 '5': [],
459 '6': [],
460 '7': [],
461 '8': [],
462 '11': [],
463 '12': [],
464 'E1': [],
465 'E1NF': [],
466 'E2': [],
467 'E3': [],
468 'E3NF': [],
469 'E4': [],
470 'E4NF': [],
471 'E5': [],
472 'E5NF': [],
473 'E6': [],
474 'E6NF': [],
475 'E7NF': [],
476 'E9': [],
477 'E9NF': [],
478 'E10': [],
479 'E11': [],
480 'E12': [],
481 'E12NF': [],
482};
483
484
485def _isValidOpcodeByte(sOpcode):
486 """
487 Checks if sOpcode is a valid lower case opcode byte.
488 Returns true/false.
489 """
490 if len(sOpcode) == 4:
491 if sOpcode[:2] == '0x':
492 if sOpcode[2] in '0123456789abcdef':
493 if sOpcode[3] in '0123456789abcdef':
494 return True;
495 return False;
496
497
498class InstructionMap(object):
499 """
500 Instruction map.
501
502 The opcode map provides the lead opcode bytes (empty for the one byte
503 opcode map). An instruction can be member of multiple opcode maps as long
504 as it uses the same opcode value within the map (because of VEX).
505 """
506
507 kdEncodings = {
508 'legacy': [],
509 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
510 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
511 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
512 'xop8': [], ##< XOP prefix with vvvvv = 8
513 'xop9': [], ##< XOP prefix with vvvvv = 9
514 'xop10': [], ##< XOP prefix with vvvvv = 10
515 };
516 ## Selectors.
517 ## The first value is the number of table entries required by a
518 ## decoder or disassembler for this type of selector.
519 kdSelectors = {
520 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
521 '/r': [ 8, ], ##< modrm.reg selects the instruction.
522 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
523 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
524 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
525 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
526 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
527 };
528
529 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
530 assert sSelector in self.kdSelectors;
531 assert sEncoding in self.kdEncodings;
532 if asLeadOpcodes is None:
533 asLeadOpcodes = [];
534 else:
535 for sOpcode in asLeadOpcodes:
536 assert _isValidOpcodeByte(sOpcode);
537 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
538
539 self.sName = sName;
540 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
541 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
542 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
543 self.aoInstructions = []; # type: Instruction
544 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
545
546 def getTableSize(self):
547 """
548 Number of table entries. This corresponds directly to the selector.
549 """
550 return self.kdSelectors[self.sSelector][0];
551
552 def getInstructionIndex(self, oInstr):
553 """
554 Returns the table index for the instruction.
555 """
556 bOpcode = oInstr.getOpcodeByte();
557
558 # The byte selector is simple. We need a full opcode byte and need just return it.
559 if self.sSelector == 'byte':
560 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
561 return bOpcode;
562
563 # The other selectors needs masking and shifting.
564 if self.sSelector == '/r':
565 return (bOpcode >> 3) & 0x7;
566
567 if self.sSelector == 'mod /r':
568 return (bOpcode >> 3) & 0x1f;
569
570 if self.sSelector == 'memreg /r':
571 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
572
573 if self.sSelector == '!11 /r':
574 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
575 return (bOpcode >> 3) & 0x7;
576
577 if self.sSelector == '11 /r':
578 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
579 return (bOpcode >> 3) & 0x7;
580
581 if self.sSelector == '11':
582 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
583 return bOpcode & 0x3f;
584
585 assert False, self.sSelector;
586 return -1;
587
588 def getInstructionsInTableOrder(self):
589 """
590 Get instructions in table order.
591
592 Returns array of instructions. Normally there is exactly one
593 instruction per entry. However the entry could also be None if
594 not instruction was specified for that opcode value. Or there
595 could be a list of instructions to deal with special encodings
596 where for instance prefix (e.g. REX.W) encodes a different
597 instruction or different CPUs have different instructions or
598 prefixes in the same place.
599 """
600 # Start with empty table.
601 cTable = self.getTableSize();
602 aoTable = [None] * cTable;
603
604 # Insert the instructions.
605 for oInstr in self.aoInstructions:
606 if oInstr.sOpcode:
607 idxOpcode = self.getInstructionIndex(oInstr);
608 assert idxOpcode < cTable, str(idxOpcode);
609
610 oExisting = aoTable[idxOpcode];
611 if oExisting is None:
612 aoTable[idxOpcode] = oInstr;
613 elif not isinstance(oExisting, list):
614 aoTable[idxOpcode] = list([oExisting, oInstr]);
615 else:
616 oExisting.append(oInstr);
617
618 return aoTable;
619
620
621 def getDisasTableName(self):
622 """
623 Returns the disassembler table name for this map.
624 """
625 sName = 'g_aDisas';
626 for sWord in self.sName.split('_'):
627 if sWord == 'm': # suffix indicating modrm.mod==mem
628 sName += '_m';
629 elif sWord == 'r': # suffix indicating modrm.mod==reg
630 sName += '_r';
631 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
632 sName += '_' + sWord;
633 else:
634 sWord = sWord.replace('grp', 'Grp');
635 sWord = sWord.replace('map', 'Map');
636 sName += sWord[0].upper() + sWord[1:];
637 return sName;
638
639
640class TestType(object):
641 """
642 Test value type.
643
644 This base class deals with integer like values. The fUnsigned constructor
645 parameter indicates the default stance on zero vs sign extending. It is
646 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
647 """
648 def __init__(self, sName, acbSizes = None, fUnsigned = True):
649 self.sName = sName;
650 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
651 self.fUnsigned = fUnsigned;
652
653 class BadValue(Exception):
654 """ Bad value exception. """
655 def __init__(self, sMessage):
656 Exception.__init__(self, sMessage);
657 self.sMessage = sMessage;
658
659 ## For ascii ~ operator.
660 kdHexInv = {
661 '0': 'f',
662 '1': 'e',
663 '2': 'd',
664 '3': 'c',
665 '4': 'b',
666 '5': 'a',
667 '6': '9',
668 '7': '8',
669 '8': '7',
670 '9': '6',
671 'a': '5',
672 'b': '4',
673 'c': '3',
674 'd': '2',
675 'e': '1',
676 'f': '0',
677 };
678
679 def get(self, sValue):
680 """
681 Get the shortest normal sized byte representation of oValue.
682
683 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
684 The latter form is for AND+OR pairs where the first entry is what to
685 AND with the field and the second the one or OR with.
686
687 Raises BadValue if invalid value.
688 """
689 if not sValue:
690 raise TestType.BadValue('empty value');
691
692 # Deal with sign and detect hexadecimal or decimal.
693 fSignExtend = not self.fUnsigned;
694 if sValue[0] == '-' or sValue[0] == '+':
695 fSignExtend = True;
696 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
697 else:
698 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
699
700 # try convert it to long integer.
701 try:
702 iValue = long(sValue, 16 if fHex else 10);
703 except Exception as oXcpt:
704 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
705
706 # Convert the hex string and pad it to a decent value. Negative values
707 # needs to be manually converted to something non-negative (~-n + 1).
708 if iValue >= 0:
709 sHex = hex(iValue);
710 if sys.version_info[0] < 3:
711 assert sHex[-1] == 'L';
712 sHex = sHex[:-1];
713 assert sHex[:2] == '0x';
714 sHex = sHex[2:];
715 else:
716 sHex = hex(-iValue - 1);
717 if sys.version_info[0] < 3:
718 assert sHex[-1] == 'L';
719 sHex = sHex[:-1];
720 assert sHex[:2] == '0x';
721 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
722 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
723 sHex = 'f' + sHex;
724
725 cDigits = len(sHex);
726 if cDigits <= self.acbSizes[-1] * 2:
727 for cb in self.acbSizes:
728 cNaturalDigits = cb * 2;
729 if cDigits <= cNaturalDigits:
730 break;
731 else:
732 cNaturalDigits = self.acbSizes[-1] * 2;
733 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
734 assert isinstance(cNaturalDigits, int)
735
736 if cNaturalDigits != cDigits:
737 cNeeded = cNaturalDigits - cDigits;
738 if iValue >= 0:
739 sHex = ('0' * cNeeded) + sHex;
740 else:
741 sHex = ('f' * cNeeded) + sHex;
742
743 # Invert and convert to bytearray and return it.
744 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
745
746 return ((fSignExtend, abValue),);
747
748 def validate(self, sValue):
749 """
750 Returns True if value is okay, error message on failure.
751 """
752 try:
753 self.get(sValue);
754 except TestType.BadValue as oXcpt:
755 return oXcpt.sMessage;
756 return True;
757
758 def isAndOrPair(self, sValue):
759 """
760 Checks if sValue is a pair.
761 """
762 _ = sValue;
763 return False;
764
765
766class TestTypeEflags(TestType):
767 """
768 Special value parsing for EFLAGS/RFLAGS/FLAGS.
769 """
770
771 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
772
773 def __init__(self, sName):
774 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
775
776 def get(self, sValue):
777 fClear = 0;
778 fSet = 0;
779 for sFlag in sValue.split(','):
780 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
781 if sConstant is None:
782 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
783 if sConstant[0] == '!':
784 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
785 else:
786 fSet |= g_kdX86EFlagsConstants[sConstant];
787
788 aoSet = TestType.get(self, '0x%x' % (fSet,));
789 if fClear != 0:
790 aoClear = TestType.get(self, '%#x' % (fClear,))
791 assert self.isAndOrPair(sValue) is True;
792 return (aoClear[0], aoSet[0]);
793 assert self.isAndOrPair(sValue) is False;
794 return aoSet;
795
796 def isAndOrPair(self, sValue):
797 for sZeroFlag in self.kdZeroValueFlags:
798 if sValue.find(sZeroFlag) >= 0:
799 return True;
800 return False;
801
802class TestTypeFromDict(TestType):
803 """
804 Special value parsing for CR0.
805 """
806
807 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
808
809 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
810 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
811 self.kdConstantsAndValues = kdConstantsAndValues;
812 self.sConstantPrefix = sConstantPrefix;
813
814 def get(self, sValue):
815 fValue = 0;
816 for sFlag in sValue.split(','):
817 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
818 if fFlagValue is None:
819 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
820 fValue |= fFlagValue;
821 return TestType.get(self, '0x%x' % (fValue,));
822
823
824class TestInOut(object):
825 """
826 One input or output state modifier.
827
828 This should be thought as values to modify BS3REGCTX and extended (needs
829 to be structured) state.
830 """
831 ## Assigned operators.
832 kasOperators = [
833 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
834 '&~=',
835 '&=',
836 '|=',
837 '='
838 ];
839 ## Types
840 kdTypes = {
841 'uint': TestType('uint', fUnsigned = True),
842 'int': TestType('int'),
843 'efl': TestTypeEflags('efl'),
844 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
845 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
846 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
847 };
848 ## CPU context fields.
849 kdFields = {
850 # name: ( default type, [both|input|output], )
851 # Operands.
852 'op1': ( 'uint', 'both', ), ## \@op1
853 'op2': ( 'uint', 'both', ), ## \@op2
854 'op3': ( 'uint', 'both', ), ## \@op3
855 'op4': ( 'uint', 'both', ), ## \@op4
856 # Flags.
857 'efl': ( 'efl', 'both', ),
858 'efl_undef': ( 'uint', 'output', ),
859 # 8-bit GPRs.
860 'al': ( 'uint', 'both', ),
861 'cl': ( 'uint', 'both', ),
862 'dl': ( 'uint', 'both', ),
863 'bl': ( 'uint', 'both', ),
864 'ah': ( 'uint', 'both', ),
865 'ch': ( 'uint', 'both', ),
866 'dh': ( 'uint', 'both', ),
867 'bh': ( 'uint', 'both', ),
868 'r8l': ( 'uint', 'both', ),
869 'r9l': ( 'uint', 'both', ),
870 'r10l': ( 'uint', 'both', ),
871 'r11l': ( 'uint', 'both', ),
872 'r12l': ( 'uint', 'both', ),
873 'r13l': ( 'uint', 'both', ),
874 'r14l': ( 'uint', 'both', ),
875 'r15l': ( 'uint', 'both', ),
876 # 16-bit GPRs.
877 'ax': ( 'uint', 'both', ),
878 'dx': ( 'uint', 'both', ),
879 'cx': ( 'uint', 'both', ),
880 'bx': ( 'uint', 'both', ),
881 'sp': ( 'uint', 'both', ),
882 'bp': ( 'uint', 'both', ),
883 'si': ( 'uint', 'both', ),
884 'di': ( 'uint', 'both', ),
885 'r8w': ( 'uint', 'both', ),
886 'r9w': ( 'uint', 'both', ),
887 'r10w': ( 'uint', 'both', ),
888 'r11w': ( 'uint', 'both', ),
889 'r12w': ( 'uint', 'both', ),
890 'r13w': ( 'uint', 'both', ),
891 'r14w': ( 'uint', 'both', ),
892 'r15w': ( 'uint', 'both', ),
893 # 32-bit GPRs.
894 'eax': ( 'uint', 'both', ),
895 'edx': ( 'uint', 'both', ),
896 'ecx': ( 'uint', 'both', ),
897 'ebx': ( 'uint', 'both', ),
898 'esp': ( 'uint', 'both', ),
899 'ebp': ( 'uint', 'both', ),
900 'esi': ( 'uint', 'both', ),
901 'edi': ( 'uint', 'both', ),
902 'r8d': ( 'uint', 'both', ),
903 'r9d': ( 'uint', 'both', ),
904 'r10d': ( 'uint', 'both', ),
905 'r11d': ( 'uint', 'both', ),
906 'r12d': ( 'uint', 'both', ),
907 'r13d': ( 'uint', 'both', ),
908 'r14d': ( 'uint', 'both', ),
909 'r15d': ( 'uint', 'both', ),
910 # 64-bit GPRs.
911 'rax': ( 'uint', 'both', ),
912 'rdx': ( 'uint', 'both', ),
913 'rcx': ( 'uint', 'both', ),
914 'rbx': ( 'uint', 'both', ),
915 'rsp': ( 'uint', 'both', ),
916 'rbp': ( 'uint', 'both', ),
917 'rsi': ( 'uint', 'both', ),
918 'rdi': ( 'uint', 'both', ),
919 'r8': ( 'uint', 'both', ),
920 'r9': ( 'uint', 'both', ),
921 'r10': ( 'uint', 'both', ),
922 'r11': ( 'uint', 'both', ),
923 'r12': ( 'uint', 'both', ),
924 'r13': ( 'uint', 'both', ),
925 'r14': ( 'uint', 'both', ),
926 'r15': ( 'uint', 'both', ),
927 # 16-bit, 32-bit or 64-bit registers according to operand size.
928 'oz.rax': ( 'uint', 'both', ),
929 'oz.rdx': ( 'uint', 'both', ),
930 'oz.rcx': ( 'uint', 'both', ),
931 'oz.rbx': ( 'uint', 'both', ),
932 'oz.rsp': ( 'uint', 'both', ),
933 'oz.rbp': ( 'uint', 'both', ),
934 'oz.rsi': ( 'uint', 'both', ),
935 'oz.rdi': ( 'uint', 'both', ),
936 'oz.r8': ( 'uint', 'both', ),
937 'oz.r9': ( 'uint', 'both', ),
938 'oz.r10': ( 'uint', 'both', ),
939 'oz.r11': ( 'uint', 'both', ),
940 'oz.r12': ( 'uint', 'both', ),
941 'oz.r13': ( 'uint', 'both', ),
942 'oz.r14': ( 'uint', 'both', ),
943 'oz.r15': ( 'uint', 'both', ),
944 # Control registers.
945 'cr0': ( 'cr0', 'both', ),
946 'cr4': ( 'cr4', 'both', ),
947 'xcr0': ( 'xcr0', 'both', ),
948 # FPU Registers
949 'fcw': ( 'uint', 'both', ),
950 'fsw': ( 'uint', 'both', ),
951 'ftw': ( 'uint', 'both', ),
952 'fop': ( 'uint', 'both', ),
953 'fpuip': ( 'uint', 'both', ),
954 'fpucs': ( 'uint', 'both', ),
955 'fpudp': ( 'uint', 'both', ),
956 'fpuds': ( 'uint', 'both', ),
957 'mxcsr': ( 'uint', 'both', ),
958 'st0': ( 'uint', 'both', ),
959 'st1': ( 'uint', 'both', ),
960 'st2': ( 'uint', 'both', ),
961 'st3': ( 'uint', 'both', ),
962 'st4': ( 'uint', 'both', ),
963 'st5': ( 'uint', 'both', ),
964 'st6': ( 'uint', 'both', ),
965 'st7': ( 'uint', 'both', ),
966 # MMX registers.
967 'mm0': ( 'uint', 'both', ),
968 'mm1': ( 'uint', 'both', ),
969 'mm2': ( 'uint', 'both', ),
970 'mm3': ( 'uint', 'both', ),
971 'mm4': ( 'uint', 'both', ),
972 'mm5': ( 'uint', 'both', ),
973 'mm6': ( 'uint', 'both', ),
974 'mm7': ( 'uint', 'both', ),
975 # SSE registers.
976 'xmm0': ( 'uint', 'both', ),
977 'xmm1': ( 'uint', 'both', ),
978 'xmm2': ( 'uint', 'both', ),
979 'xmm3': ( 'uint', 'both', ),
980 'xmm4': ( 'uint', 'both', ),
981 'xmm5': ( 'uint', 'both', ),
982 'xmm6': ( 'uint', 'both', ),
983 'xmm7': ( 'uint', 'both', ),
984 'xmm8': ( 'uint', 'both', ),
985 'xmm9': ( 'uint', 'both', ),
986 'xmm10': ( 'uint', 'both', ),
987 'xmm11': ( 'uint', 'both', ),
988 'xmm12': ( 'uint', 'both', ),
989 'xmm13': ( 'uint', 'both', ),
990 'xmm14': ( 'uint', 'both', ),
991 'xmm15': ( 'uint', 'both', ),
992 'xmm0.lo': ( 'uint', 'both', ),
993 'xmm1.lo': ( 'uint', 'both', ),
994 'xmm2.lo': ( 'uint', 'both', ),
995 'xmm3.lo': ( 'uint', 'both', ),
996 'xmm4.lo': ( 'uint', 'both', ),
997 'xmm5.lo': ( 'uint', 'both', ),
998 'xmm6.lo': ( 'uint', 'both', ),
999 'xmm7.lo': ( 'uint', 'both', ),
1000 'xmm8.lo': ( 'uint', 'both', ),
1001 'xmm9.lo': ( 'uint', 'both', ),
1002 'xmm10.lo': ( 'uint', 'both', ),
1003 'xmm11.lo': ( 'uint', 'both', ),
1004 'xmm12.lo': ( 'uint', 'both', ),
1005 'xmm13.lo': ( 'uint', 'both', ),
1006 'xmm14.lo': ( 'uint', 'both', ),
1007 'xmm15.lo': ( 'uint', 'both', ),
1008 'xmm0.hi': ( 'uint', 'both', ),
1009 'xmm1.hi': ( 'uint', 'both', ),
1010 'xmm2.hi': ( 'uint', 'both', ),
1011 'xmm3.hi': ( 'uint', 'both', ),
1012 'xmm4.hi': ( 'uint', 'both', ),
1013 'xmm5.hi': ( 'uint', 'both', ),
1014 'xmm6.hi': ( 'uint', 'both', ),
1015 'xmm7.hi': ( 'uint', 'both', ),
1016 'xmm8.hi': ( 'uint', 'both', ),
1017 'xmm9.hi': ( 'uint', 'both', ),
1018 'xmm10.hi': ( 'uint', 'both', ),
1019 'xmm11.hi': ( 'uint', 'both', ),
1020 'xmm12.hi': ( 'uint', 'both', ),
1021 'xmm13.hi': ( 'uint', 'both', ),
1022 'xmm14.hi': ( 'uint', 'both', ),
1023 'xmm15.hi': ( 'uint', 'both', ),
1024 'xmm0.lo.zx': ( 'uint', 'both', ),
1025 'xmm1.lo.zx': ( 'uint', 'both', ),
1026 'xmm2.lo.zx': ( 'uint', 'both', ),
1027 'xmm3.lo.zx': ( 'uint', 'both', ),
1028 'xmm4.lo.zx': ( 'uint', 'both', ),
1029 'xmm5.lo.zx': ( 'uint', 'both', ),
1030 'xmm6.lo.zx': ( 'uint', 'both', ),
1031 'xmm7.lo.zx': ( 'uint', 'both', ),
1032 'xmm8.lo.zx': ( 'uint', 'both', ),
1033 'xmm9.lo.zx': ( 'uint', 'both', ),
1034 'xmm10.lo.zx': ( 'uint', 'both', ),
1035 'xmm11.lo.zx': ( 'uint', 'both', ),
1036 'xmm12.lo.zx': ( 'uint', 'both', ),
1037 'xmm13.lo.zx': ( 'uint', 'both', ),
1038 'xmm14.lo.zx': ( 'uint', 'both', ),
1039 'xmm15.lo.zx': ( 'uint', 'both', ),
1040 'xmm0.dw0': ( 'uint', 'both', ),
1041 'xmm1.dw0': ( 'uint', 'both', ),
1042 'xmm2.dw0': ( 'uint', 'both', ),
1043 'xmm3.dw0': ( 'uint', 'both', ),
1044 'xmm4.dw0': ( 'uint', 'both', ),
1045 'xmm5.dw0': ( 'uint', 'both', ),
1046 'xmm6.dw0': ( 'uint', 'both', ),
1047 'xmm7.dw0': ( 'uint', 'both', ),
1048 'xmm8.dw0': ( 'uint', 'both', ),
1049 'xmm9.dw0': ( 'uint', 'both', ),
1050 'xmm10.dw0': ( 'uint', 'both', ),
1051 'xmm11.dw0': ( 'uint', 'both', ),
1052 'xmm12.dw0': ( 'uint', 'both', ),
1053 'xmm13.dw0': ( 'uint', 'both', ),
1054 'xmm14.dw0': ( 'uint', 'both', ),
1055 'xmm15_dw0': ( 'uint', 'both', ),
1056 # AVX registers.
1057 'ymm0': ( 'uint', 'both', ),
1058 'ymm1': ( 'uint', 'both', ),
1059 'ymm2': ( 'uint', 'both', ),
1060 'ymm3': ( 'uint', 'both', ),
1061 'ymm4': ( 'uint', 'both', ),
1062 'ymm5': ( 'uint', 'both', ),
1063 'ymm6': ( 'uint', 'both', ),
1064 'ymm7': ( 'uint', 'both', ),
1065 'ymm8': ( 'uint', 'both', ),
1066 'ymm9': ( 'uint', 'both', ),
1067 'ymm10': ( 'uint', 'both', ),
1068 'ymm11': ( 'uint', 'both', ),
1069 'ymm12': ( 'uint', 'both', ),
1070 'ymm13': ( 'uint', 'both', ),
1071 'ymm14': ( 'uint', 'both', ),
1072 'ymm15': ( 'uint', 'both', ),
1073
1074 # Special ones.
1075 'value.xcpt': ( 'uint', 'output', ),
1076 };
1077
1078 def __init__(self, sField, sOp, sValue, sType):
1079 assert sField in self.kdFields;
1080 assert sOp in self.kasOperators;
1081 self.sField = sField;
1082 self.sOp = sOp;
1083 self.sValue = sValue;
1084 self.sType = sType;
1085 assert isinstance(sField, str);
1086 assert isinstance(sOp, str);
1087 assert isinstance(sType, str);
1088 assert isinstance(sValue, str);
1089
1090
1091class TestSelector(object):
1092 """
1093 One selector for an instruction test.
1094 """
1095 ## Selector compare operators.
1096 kasCompareOps = [ '==', '!=' ];
1097 ## Selector variables and their valid values.
1098 kdVariables = {
1099 # Operand size.
1100 'size': {
1101 'o16': 'size_o16',
1102 'o32': 'size_o32',
1103 'o64': 'size_o64',
1104 },
1105 # Execution ring.
1106 'ring': {
1107 '0': 'ring_0',
1108 '1': 'ring_1',
1109 '2': 'ring_2',
1110 '3': 'ring_3',
1111 '0..2': 'ring_0_thru_2',
1112 '1..3': 'ring_1_thru_3',
1113 },
1114 # Basic code mode.
1115 'codebits': {
1116 '64': 'code_64bit',
1117 '32': 'code_32bit',
1118 '16': 'code_16bit',
1119 },
1120 # cpu modes.
1121 'mode': {
1122 'real': 'mode_real',
1123 'prot': 'mode_prot',
1124 'long': 'mode_long',
1125 'v86': 'mode_v86',
1126 'smm': 'mode_smm',
1127 'vmx': 'mode_vmx',
1128 'svm': 'mode_svm',
1129 },
1130 # paging on/off
1131 'paging': {
1132 'on': 'paging_on',
1133 'off': 'paging_off',
1134 },
1135 # CPU vendor
1136 'vendor': {
1137 'amd': 'vendor_amd',
1138 'intel': 'vendor_intel',
1139 'via': 'vendor_via',
1140 },
1141 };
1142 ## Selector shorthand predicates.
1143 ## These translates into variable expressions.
1144 kdPredicates = {
1145 'o16': 'size==o16',
1146 'o32': 'size==o32',
1147 'o64': 'size==o64',
1148 'ring0': 'ring==0',
1149 '!ring0': 'ring==1..3',
1150 'ring1': 'ring==1',
1151 'ring2': 'ring==2',
1152 'ring3': 'ring==3',
1153 'user': 'ring==3',
1154 'supervisor': 'ring==0..2',
1155 'real': 'mode==real',
1156 'prot': 'mode==prot',
1157 'long': 'mode==long',
1158 'v86': 'mode==v86',
1159 'smm': 'mode==smm',
1160 'vmx': 'mode==vmx',
1161 'svm': 'mode==svm',
1162 'paging': 'paging==on',
1163 '!paging': 'paging==off',
1164 'amd': 'vendor==amd',
1165 '!amd': 'vendor!=amd',
1166 'intel': 'vendor==intel',
1167 '!intel': 'vendor!=intel',
1168 'via': 'vendor==via',
1169 '!via': 'vendor!=via',
1170 };
1171
1172 def __init__(self, sVariable, sOp, sValue):
1173 assert sVariable in self.kdVariables;
1174 assert sOp in self.kasCompareOps;
1175 assert sValue in self.kdVariables[sVariable];
1176 self.sVariable = sVariable;
1177 self.sOp = sOp;
1178 self.sValue = sValue;
1179
1180
1181class InstructionTest(object):
1182 """
1183 Instruction test.
1184 """
1185
1186 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1187 self.oInstr = oInstr; # type: InstructionTest
1188 self.aoInputs = []; # type: list(TestInOut)
1189 self.aoOutputs = []; # type: list(TestInOut)
1190 self.aoSelectors = []; # type: list(TestSelector)
1191
1192 def toString(self, fRepr = False):
1193 """
1194 Converts it to string representation.
1195 """
1196 asWords = [];
1197 if self.aoSelectors:
1198 for oSelector in self.aoSelectors:
1199 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1200 asWords.append('/');
1201
1202 for oModifier in self.aoInputs:
1203 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1204
1205 asWords.append('->');
1206
1207 for oModifier in self.aoOutputs:
1208 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1209
1210 if fRepr:
1211 return '<' + ' '.join(asWords) + '>';
1212 return ' '.join(asWords);
1213
1214 def __str__(self):
1215 """ Provide string represenation. """
1216 return self.toString(False);
1217
1218 def __repr__(self):
1219 """ Provide unambigious string representation. """
1220 return self.toString(True);
1221
1222class Operand(object):
1223 """
1224 Instruction operand.
1225 """
1226
1227 def __init__(self, sWhere, sType):
1228 assert sWhere in g_kdOpLocations, sWhere;
1229 assert sType in g_kdOpTypes, sType;
1230 self.sWhere = sWhere; ##< g_kdOpLocations
1231 self.sType = sType; ##< g_kdOpTypes
1232
1233 def usesModRM(self):
1234 """ Returns True if using some form of ModR/M encoding. """
1235 return self.sType[0] in ['E', 'G', 'M'];
1236
1237
1238
1239class Instruction(object): # pylint: disable=too-many-instance-attributes
1240 """
1241 Instruction.
1242 """
1243
1244 def __init__(self, sSrcFile, iLine):
1245 ## @name Core attributes.
1246 ## @{
1247 self.sMnemonic = None;
1248 self.sBrief = None;
1249 self.asDescSections = []; # type: list(str)
1250 self.aoMaps = []; # type: list(InstructionMap)
1251 self.aoOperands = []; # type: list(Operand)
1252 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1253 self.sOpcode = None; # type: str
1254 self.sSubOpcode = None; # type: str
1255 self.sEncoding = None;
1256 self.asFlTest = None;
1257 self.asFlModify = None;
1258 self.asFlUndefined = None;
1259 self.asFlSet = None;
1260 self.asFlClear = None;
1261 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1262 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1263 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1264 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1265 self.aoTests = []; # type: list(InstructionTest)
1266 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1267 self.oCpuExpr = None; ##< Some CPU restriction expression...
1268 self.sGroup = None;
1269 self.fUnused = False; ##< Unused instruction.
1270 self.fInvalid = False; ##< Invalid instruction (like UD2).
1271 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1272 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1273 ## @}
1274
1275 ## @name Implementation attributes.
1276 ## @{
1277 self.sStats = None;
1278 self.sFunction = None;
1279 self.fStub = False;
1280 self.fUdStub = False;
1281 ## @}
1282
1283 ## @name Decoding info
1284 ## @{
1285 self.sSrcFile = sSrcFile;
1286 self.iLineCreated = iLine;
1287 self.iLineCompleted = None;
1288 self.cOpTags = 0;
1289 self.iLineFnIemOpMacro = -1;
1290 self.iLineMnemonicMacro = -1;
1291 ## @}
1292
1293 ## @name Intermediate input fields.
1294 ## @{
1295 self.sRawDisOpNo = None;
1296 self.asRawDisParams = [];
1297 self.sRawIemOpFlags = None;
1298 self.sRawOldOpcodes = None;
1299 self.asCopyTests = [];
1300 ## @}
1301
1302 def toString(self, fRepr = False):
1303 """ Turn object into a string. """
1304 aasFields = [];
1305
1306 aasFields.append(['opcode', self.sOpcode]);
1307 aasFields.append(['mnemonic', self.sMnemonic]);
1308 for iOperand, oOperand in enumerate(self.aoOperands):
1309 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1310 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1311 aasFields.append(['encoding', self.sEncoding]);
1312 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1313 aasFields.append(['disenum', self.sDisEnum]);
1314 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1315 aasFields.append(['group', self.sGroup]);
1316 if self.fUnused: aasFields.append(['unused', 'True']);
1317 if self.fInvalid: aasFields.append(['invalid', 'True']);
1318 aasFields.append(['invlstyle', self.sInvalidStyle]);
1319 aasFields.append(['fltest', self.asFlTest]);
1320 aasFields.append(['flmodify', self.asFlModify]);
1321 aasFields.append(['flundef', self.asFlUndefined]);
1322 aasFields.append(['flset', self.asFlSet]);
1323 aasFields.append(['flclear', self.asFlClear]);
1324 aasFields.append(['mincpu', self.sMinCpu]);
1325 aasFields.append(['stats', self.sStats]);
1326 aasFields.append(['sFunction', self.sFunction]);
1327 if self.fStub: aasFields.append(['fStub', 'True']);
1328 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1329 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1330 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1331 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1332
1333 sRet = '<' if fRepr else '';
1334 for sField, sValue in aasFields:
1335 if sValue != None:
1336 if len(sRet) > 1:
1337 sRet += '; ';
1338 sRet += '%s=%s' % (sField, sValue,);
1339 if fRepr:
1340 sRet += '>';
1341
1342 return sRet;
1343
1344 def __str__(self):
1345 """ Provide string represenation. """
1346 return self.toString(False);
1347
1348 def __repr__(self):
1349 """ Provide unambigious string representation. """
1350 return self.toString(True);
1351
1352 def getOpcodeByte(self):
1353 """
1354 Decodes sOpcode into a byte range integer value.
1355 Raises exception if sOpcode is None or invalid.
1356 """
1357 if self.sOpcode is None:
1358 raise Exception('No opcode byte for %s!' % (self,));
1359 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1360
1361 # Full hex byte form.
1362 if sOpcode[:2] == '0x':
1363 return int(sOpcode, 16);
1364
1365 # The /r form:
1366 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1367 return int(sOpcode[-1:]) << 3;
1368
1369 # The 11/r form:
1370 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1371 return (int(sOpcode[-1:]) << 3) | 0xc0;
1372
1373 # The !11/r form (returns mod=1):
1374 ## @todo this doesn't really work...
1375 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1376 return (int(sOpcode[-1:]) << 3) | 0x80;
1377
1378 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1379
1380 @staticmethod
1381 def _flagsToIntegerMask(asFlags):
1382 """
1383 Returns the integer mask value for asFlags.
1384 """
1385 uRet = 0;
1386 if asFlags:
1387 for sFlag in asFlags:
1388 sConstant = g_kdEFlagsMnemonics[sFlag];
1389 assert sConstant[0] != '!', sConstant
1390 uRet |= g_kdX86EFlagsConstants[sConstant];
1391 return uRet;
1392
1393 def getTestedFlagsMask(self):
1394 """ Returns asFlTest into a integer mask value """
1395 return self._flagsToIntegerMask(self.asFlTest);
1396
1397 def getModifiedFlagsMask(self):
1398 """ Returns asFlModify into a integer mask value """
1399 return self._flagsToIntegerMask(self.asFlModify);
1400
1401 def getUndefinedFlagsMask(self):
1402 """ Returns asFlUndefined into a integer mask value """
1403 return self._flagsToIntegerMask(self.asFlUndefined);
1404
1405 def getSetFlagsMask(self):
1406 """ Returns asFlSet into a integer mask value """
1407 return self._flagsToIntegerMask(self.asFlSet);
1408
1409 def getClearedFlagsMask(self):
1410 """ Returns asFlClear into a integer mask value """
1411 return self._flagsToIntegerMask(self.asFlClear);
1412
1413
1414## All the instructions.
1415g_aoAllInstructions = []; # type: list(Instruction)
1416
1417## All the instructions indexed by statistics name (opstat).
1418g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1419
1420## All the instructions indexed by function name (opfunction).
1421g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1422
1423## Instructions tagged by oponlytest
1424g_aoOnlyTestInstructions = []; # type: list(Instruction)
1425
1426## Instruction maps.
1427g_dInstructionMaps = {
1428 'one': InstructionMap('one'),
1429 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1430 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1431 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1432 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1433 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1434 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1435 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1436 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1437 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1438 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1439 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1440 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1441 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1442 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1443 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1444 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1445 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1446 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1447 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1448
1449 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1450 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1451 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1452 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1453 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1454 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1455 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1456 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1457 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1458 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1459 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1460 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1461 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1462 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1463
1464 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1465 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1466
1467 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1468 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1469 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1470 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1471 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1472 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1473
1474 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1475 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1476
1477 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1478 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1479 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1480 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1481 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1482 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1483 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1484};
1485
1486
1487
1488class ParserException(Exception):
1489 """ Parser exception """
1490 def __init__(self, sMessage):
1491 Exception.__init__(self, sMessage);
1492
1493
1494class SimpleParser(object):
1495 """
1496 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1497 """
1498
1499 ## @name Parser state.
1500 ## @{
1501 kiCode = 0;
1502 kiCommentMulti = 1;
1503 ## @}
1504
1505 def __init__(self, sSrcFile, asLines, sDefaultMap):
1506 self.sSrcFile = sSrcFile;
1507 self.asLines = asLines;
1508 self.iLine = 0;
1509 self.iState = self.kiCode;
1510 self.sComment = '';
1511 self.iCommentLine = 0;
1512 self.aoCurInstrs = [];
1513
1514 assert sDefaultMap in g_dInstructionMaps;
1515 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1516
1517 self.cTotalInstr = 0;
1518 self.cTotalStubs = 0;
1519 self.cTotalTagged = 0;
1520
1521 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1522 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1523 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1524 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1525 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1526 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1527 self.fDebug = True;
1528
1529 self.dTagHandlers = {
1530 '@opbrief': self.parseTagOpBrief,
1531 '@opdesc': self.parseTagOpDesc,
1532 '@opmnemonic': self.parseTagOpMnemonic,
1533 '@op1': self.parseTagOpOperandN,
1534 '@op2': self.parseTagOpOperandN,
1535 '@op3': self.parseTagOpOperandN,
1536 '@op4': self.parseTagOpOperandN,
1537 '@oppfx': self.parseTagOpPfx,
1538 '@opmaps': self.parseTagOpMaps,
1539 '@opcode': self.parseTagOpcode,
1540 '@opcodesub': self.parseTagOpcodeSub,
1541 '@openc': self.parseTagOpEnc,
1542 '@opfltest': self.parseTagOpEFlags,
1543 '@opflmodify': self.parseTagOpEFlags,
1544 '@opflundef': self.parseTagOpEFlags,
1545 '@opflset': self.parseTagOpEFlags,
1546 '@opflclear': self.parseTagOpEFlags,
1547 '@ophints': self.parseTagOpHints,
1548 '@opdisenum': self.parseTagOpDisEnum,
1549 '@opmincpu': self.parseTagOpMinCpu,
1550 '@opcpuid': self.parseTagOpCpuId,
1551 '@opgroup': self.parseTagOpGroup,
1552 '@opunused': self.parseTagOpUnusedInvalid,
1553 '@opinvalid': self.parseTagOpUnusedInvalid,
1554 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1555 '@optest': self.parseTagOpTest,
1556 '@optestign': self.parseTagOpTestIgnore,
1557 '@optestignore': self.parseTagOpTestIgnore,
1558 '@opcopytests': self.parseTagOpCopyTests,
1559 '@oponly': self.parseTagOpOnlyTest,
1560 '@oponlytest': self.parseTagOpOnlyTest,
1561 '@opxcpttype': self.parseTagOpXcptType,
1562 '@opstats': self.parseTagOpStats,
1563 '@opfunction': self.parseTagOpFunction,
1564 '@opdone': self.parseTagOpDone,
1565 };
1566 for i in range(48):
1567 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1568 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1569
1570 self.asErrors = [];
1571
1572 def raiseError(self, sMessage):
1573 """
1574 Raise error prefixed with the source and line number.
1575 """
1576 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1577
1578 def raiseCommentError(self, iLineInComment, sMessage):
1579 """
1580 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1581 """
1582 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1583
1584 def error(self, sMessage):
1585 """
1586 Adds an error.
1587 returns False;
1588 """
1589 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1590 return False;
1591
1592 def errorComment(self, iLineInComment, sMessage):
1593 """
1594 Adds a comment error.
1595 returns False;
1596 """
1597 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1598 return False;
1599
1600 def printErrors(self):
1601 """
1602 Print the errors to stderr.
1603 Returns number of errors.
1604 """
1605 if self.asErrors:
1606 sys.stderr.write(u''.join(self.asErrors));
1607 return len(self.asErrors);
1608
1609 def debug(self, sMessage):
1610 """
1611 For debugging.
1612 """
1613 if self.fDebug:
1614 print('debug: %s' % (sMessage,));
1615
1616
1617 def addInstruction(self, iLine = None):
1618 """
1619 Adds an instruction.
1620 """
1621 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1622 g_aoAllInstructions.append(oInstr);
1623 self.aoCurInstrs.append(oInstr);
1624 return oInstr;
1625
1626 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1627 """
1628 Derives the mnemonic and operands from a IEM stats base name like string.
1629 """
1630 if oInstr.sMnemonic is None:
1631 asWords = sStats.split('_');
1632 oInstr.sMnemonic = asWords[0].lower();
1633 if len(asWords) > 1 and not oInstr.aoOperands:
1634 for sType in asWords[1:]:
1635 if sType in g_kdOpTypes:
1636 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1637 else:
1638 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1639 return False;
1640 return True;
1641
1642 def doneInstructionOne(self, oInstr, iLine):
1643 """
1644 Complete the parsing by processing, validating and expanding raw inputs.
1645 """
1646 assert oInstr.iLineCompleted is None;
1647 oInstr.iLineCompleted = iLine;
1648
1649 #
1650 # Specified instructions.
1651 #
1652 if oInstr.cOpTags > 0:
1653 if oInstr.sStats is None:
1654 pass;
1655
1656 #
1657 # Unspecified legacy stuff. We generally only got a few things to go on here.
1658 # /** Opcode 0x0f 0x00 /0. */
1659 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1660 #
1661 else:
1662 #if oInstr.sRawOldOpcodes:
1663 #
1664 #if oInstr.sMnemonic:
1665 pass;
1666
1667 #
1668 # Common defaults.
1669 #
1670
1671 # Guess mnemonic and operands from stats if the former is missing.
1672 if oInstr.sMnemonic is None:
1673 if oInstr.sStats is not None:
1674 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1675 elif oInstr.sFunction is not None:
1676 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1677
1678 # Derive the disassembler op enum constant from the mnemonic.
1679 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1680 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1681
1682 # Derive the IEM statistics base name from mnemonic and operand types.
1683 if oInstr.sStats is None:
1684 if oInstr.sFunction is not None:
1685 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1686 elif oInstr.sMnemonic is not None:
1687 oInstr.sStats = oInstr.sMnemonic;
1688 for oOperand in oInstr.aoOperands:
1689 if oOperand.sType:
1690 oInstr.sStats += '_' + oOperand.sType;
1691
1692 # Derive the IEM function name from mnemonic and operand types.
1693 if oInstr.sFunction is None:
1694 if oInstr.sMnemonic is not None:
1695 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1696 for oOperand in oInstr.aoOperands:
1697 if oOperand.sType:
1698 oInstr.sFunction += '_' + oOperand.sType;
1699 elif oInstr.sStats:
1700 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1701
1702 # Derive encoding from operands.
1703 if oInstr.sEncoding is None:
1704 if not oInstr.aoOperands:
1705 if oInstr.fUnused and oInstr.sSubOpcode:
1706 oInstr.sEncoding = 'ModR/M';
1707 else:
1708 oInstr.sEncoding = 'fixed';
1709 elif oInstr.aoOperands[0].usesModRM():
1710 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1711 oInstr.sEncoding = 'ModR/M+VEX';
1712 else:
1713 oInstr.sEncoding = 'ModR/M';
1714
1715 #
1716 # Apply default map and then add the instruction to all it's groups.
1717 #
1718 if not oInstr.aoMaps:
1719 oInstr.aoMaps = [ self.oDefaultMap, ];
1720 for oMap in oInstr.aoMaps:
1721 oMap.aoInstructions.append(oInstr);
1722
1723 #
1724 # Check the opstat value and add it to the opstat indexed dictionary.
1725 #
1726 if oInstr.sStats:
1727 if oInstr.sStats not in g_dAllInstructionsByStat:
1728 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1729 else:
1730 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1731 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1732
1733 #
1734 # Add to function indexed dictionary. We allow multiple instructions per function.
1735 #
1736 if oInstr.sFunction:
1737 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1738 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1739 else:
1740 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1741
1742 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1743 return True;
1744
1745 def doneInstructions(self, iLineInComment = None):
1746 """
1747 Done with current instruction.
1748 """
1749 for oInstr in self.aoCurInstrs:
1750 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1751 if oInstr.fStub:
1752 self.cTotalStubs += 1;
1753
1754 self.cTotalInstr += len(self.aoCurInstrs);
1755
1756 self.sComment = '';
1757 self.aoCurInstrs = [];
1758 return True;
1759
1760 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1761 """
1762 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1763 is False, only None values and empty strings are replaced.
1764 """
1765 for oInstr in self.aoCurInstrs:
1766 if fOverwrite is not True:
1767 oOldValue = getattr(oInstr, sAttrib);
1768 if oOldValue is not None:
1769 continue;
1770 setattr(oInstr, sAttrib, oValue);
1771
1772 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1773 """
1774 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1775 If fOverwrite is False, only None values and empty strings are replaced.
1776 """
1777 for oInstr in self.aoCurInstrs:
1778 aoArray = getattr(oInstr, sAttrib);
1779 while len(aoArray) <= iEntry:
1780 aoArray.append(None);
1781 if fOverwrite is True or aoArray[iEntry] is None:
1782 aoArray[iEntry] = oValue;
1783
1784 def parseCommentOldOpcode(self, asLines):
1785 """ Deals with 'Opcode 0xff /4' like comments """
1786 asWords = asLines[0].split();
1787 if len(asWords) >= 2 \
1788 and asWords[0] == 'Opcode' \
1789 and ( asWords[1].startswith('0x')
1790 or asWords[1].startswith('0X')):
1791 asWords = asWords[:1];
1792 for iWord, sWord in enumerate(asWords):
1793 if sWord.startswith('0X'):
1794 sWord = '0x' + sWord[:2];
1795 asWords[iWord] = asWords;
1796 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1797
1798 return False;
1799
1800 def ensureInstructionForOpTag(self, iTagLine):
1801 """ Ensure there is an instruction for the op-tag being parsed. """
1802 if not self.aoCurInstrs:
1803 self.addInstruction(self.iCommentLine + iTagLine);
1804 for oInstr in self.aoCurInstrs:
1805 oInstr.cOpTags += 1;
1806 if oInstr.cOpTags == 1:
1807 self.cTotalTagged += 1;
1808 return self.aoCurInstrs[-1];
1809
1810 @staticmethod
1811 def flattenSections(aasSections):
1812 """
1813 Flattens multiline sections into stripped single strings.
1814 Returns list of strings, on section per string.
1815 """
1816 asRet = [];
1817 for asLines in aasSections:
1818 if asLines:
1819 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1820 return asRet;
1821
1822 @staticmethod
1823 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1824 """
1825 Flattens sections into a simple stripped string with newlines as
1826 section breaks. The final section does not sport a trailing newline.
1827 """
1828 # Typical: One section with a single line.
1829 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1830 return aasSections[0][0].strip();
1831
1832 sRet = '';
1833 for iSection, asLines in enumerate(aasSections):
1834 if asLines:
1835 if iSection > 0:
1836 sRet += sSectionSep;
1837 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1838 return sRet;
1839
1840
1841
1842 ## @name Tag parsers
1843 ## @{
1844
1845 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1846 """
1847 Tag: \@opbrief
1848 Value: Text description, multiple sections, appended.
1849
1850 Brief description. If not given, it's the first sentence from @opdesc.
1851 """
1852 oInstr = self.ensureInstructionForOpTag(iTagLine);
1853
1854 # Flatten and validate the value.
1855 sBrief = self.flattenAllSections(aasSections);
1856 if not sBrief:
1857 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1858 if sBrief[-1] != '.':
1859 sBrief = sBrief + '.';
1860 if len(sBrief) > 180:
1861 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1862 offDot = sBrief.find('.');
1863 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1864 offDot = sBrief.find('.', offDot + 1);
1865 if offDot >= 0 and offDot != len(sBrief) - 1:
1866 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1867
1868 # Update the instruction.
1869 if oInstr.sBrief is not None:
1870 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1871 % (sTag, oInstr.sBrief, sBrief,));
1872 _ = iEndLine;
1873 return True;
1874
1875 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1876 """
1877 Tag: \@opdesc
1878 Value: Text description, multiple sections, appended.
1879
1880 It is used to describe instructions.
1881 """
1882 oInstr = self.ensureInstructionForOpTag(iTagLine);
1883 if aasSections:
1884 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1885 return True;
1886
1887 _ = sTag; _ = iEndLine;
1888 return True;
1889
1890 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1891 """
1892 Tag: @opmenmonic
1893 Value: mnemonic
1894
1895 The 'mnemonic' value must be a valid C identifier string. Because of
1896 prefixes, groups and whatnot, there times when the mnemonic isn't that
1897 of an actual assembler mnemonic.
1898 """
1899 oInstr = self.ensureInstructionForOpTag(iTagLine);
1900
1901 # Flatten and validate the value.
1902 sMnemonic = self.flattenAllSections(aasSections);
1903 if not self.oReMnemonic.match(sMnemonic):
1904 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1905 if oInstr.sMnemonic is not None:
1906 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1907 % (sTag, oInstr.sMnemonic, sMnemonic,));
1908 oInstr.sMnemonic = sMnemonic
1909
1910 _ = iEndLine;
1911 return True;
1912
1913 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1914 """
1915 Tags: \@op1, \@op2, \@op3, \@op4
1916 Value: [where:]type
1917
1918 The 'where' value indicates where the operand is found, like the 'reg'
1919 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1920 a list.
1921
1922 The 'type' value indicates the operand type. These follow the types
1923 given in the opcode tables in the CPU reference manuals.
1924 See Instruction.kdOperandTypes for a list.
1925
1926 """
1927 oInstr = self.ensureInstructionForOpTag(iTagLine);
1928 idxOp = int(sTag[-1]) - 1;
1929 assert idxOp >= 0 and idxOp < 4;
1930
1931 # flatten, split up, and validate the "where:type" value.
1932 sFlattened = self.flattenAllSections(aasSections);
1933 asSplit = sFlattened.split(':');
1934 if len(asSplit) == 1:
1935 sType = asSplit[0];
1936 sWhere = None;
1937 elif len(asSplit) == 2:
1938 (sWhere, sType) = asSplit;
1939 else:
1940 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1941
1942 if sType not in g_kdOpTypes:
1943 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1944 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1945 if sWhere is None:
1946 sWhere = g_kdOpTypes[sType][1];
1947 elif sWhere not in g_kdOpLocations:
1948 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1949 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1950
1951 # Insert the operand, refusing to overwrite an existing one.
1952 while idxOp >= len(oInstr.aoOperands):
1953 oInstr.aoOperands.append(None);
1954 if oInstr.aoOperands[idxOp] is not None:
1955 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1956 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1957 sWhere, sType,));
1958 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1959
1960 _ = iEndLine;
1961 return True;
1962
1963 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1964 """
1965 Tag: \@opmaps
1966 Value: map[,map2]
1967
1968 Indicates which maps the instruction is in. There is a default map
1969 associated with each input file.
1970 """
1971 oInstr = self.ensureInstructionForOpTag(iTagLine);
1972
1973 # Flatten, split up and validate the value.
1974 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1975 asMaps = sFlattened.split(',');
1976 if not asMaps:
1977 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1978 for sMap in asMaps:
1979 if sMap not in g_dInstructionMaps:
1980 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1981 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1982
1983 # Add the maps to the current list. Throw errors on duplicates.
1984 for oMap in oInstr.aoMaps:
1985 if oMap.sName in asMaps:
1986 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1987
1988 for sMap in asMaps:
1989 oMap = g_dInstructionMaps[sMap];
1990 if oMap not in oInstr.aoMaps:
1991 oInstr.aoMaps.append(oMap);
1992 else:
1993 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1994
1995 _ = iEndLine;
1996 return True;
1997
1998 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1999 """
2000 Tag: \@oppfx
2001 Value: n/a|none|0x66|0xf3|0xf2
2002
2003 Required prefix for the instruction. (In a (E)VEX context this is the
2004 value of the 'pp' field rather than an actual prefix.)
2005 """
2006 oInstr = self.ensureInstructionForOpTag(iTagLine);
2007
2008 # Flatten and validate the value.
2009 sFlattened = self.flattenAllSections(aasSections);
2010 asPrefixes = sFlattened.split();
2011 if len(asPrefixes) > 1:
2012 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2013
2014 sPrefix = asPrefixes[0].lower();
2015 if sPrefix == 'none':
2016 sPrefix = 'none';
2017 elif sPrefix == 'n/a':
2018 sPrefix = None;
2019 else:
2020 if len(sPrefix) == 2:
2021 sPrefix = '0x' + sPrefix;
2022 if not _isValidOpcodeByte(sPrefix):
2023 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2024
2025 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2026 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2027
2028 # Set it.
2029 if oInstr.sPrefix is not None:
2030 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2031 oInstr.sPrefix = sPrefix;
2032
2033 _ = iEndLine;
2034 return True;
2035
2036 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2037 """
2038 Tag: \@opcode
2039 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2040
2041 The opcode byte or sub-byte for the instruction in the context of a map.
2042 """
2043 oInstr = self.ensureInstructionForOpTag(iTagLine);
2044
2045 # Flatten and validate the value.
2046 sOpcode = self.flattenAllSections(aasSections);
2047 if _isValidOpcodeByte(sOpcode):
2048 pass;
2049 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2050 pass;
2051 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2052 pass;
2053 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2054 pass;
2055 else:
2056 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2057
2058 # Set it.
2059 if oInstr.sOpcode is not None:
2060 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2061 oInstr.sOpcode = sOpcode;
2062
2063 _ = iEndLine;
2064 return True;
2065
2066 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2067 """
2068 Tag: \@opcodesub
2069 Value: none | 11 mr/reg | !11 mr/reg
2070
2071 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2072 represents exactly two different instructions. The more proper way would
2073 be to go via maps with two members, but this is faster.
2074 """
2075 oInstr = self.ensureInstructionForOpTag(iTagLine);
2076
2077 # Flatten and validate the value.
2078 sSubOpcode = self.flattenAllSections(aasSections);
2079 if sSubOpcode not in g_kdSubOpcodes:
2080 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2081 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2082
2083 # Set it.
2084 if oInstr.sSubOpcode is not None:
2085 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2086 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2087 oInstr.sSubOpcode = sSubOpcode;
2088
2089 _ = iEndLine;
2090 return True;
2091
2092 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2093 """
2094 Tag: \@openc
2095 Value: ModR/M|fixed|prefix|<map name>
2096
2097 The instruction operand encoding style.
2098 """
2099 oInstr = self.ensureInstructionForOpTag(iTagLine);
2100
2101 # Flatten and validate the value.
2102 sEncoding = self.flattenAllSections(aasSections);
2103 if sEncoding in g_kdEncodings:
2104 pass;
2105 elif sEncoding in g_dInstructionMaps:
2106 pass;
2107 elif not _isValidOpcodeByte(sEncoding):
2108 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2109
2110 # Set it.
2111 if oInstr.sEncoding is not None:
2112 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2113 % ( sTag, oInstr.sEncoding, sEncoding,));
2114 oInstr.sEncoding = sEncoding;
2115
2116 _ = iEndLine;
2117 return True;
2118
2119 ## EFlags tag to Instruction attribute name.
2120 kdOpFlagToAttr = {
2121 '@opfltest': 'asFlTest',
2122 '@opflmodify': 'asFlModify',
2123 '@opflundef': 'asFlUndefined',
2124 '@opflset': 'asFlSet',
2125 '@opflclear': 'asFlClear',
2126 };
2127
2128 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2129 """
2130 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2131 Value: <eflags specifier>
2132
2133 """
2134 oInstr = self.ensureInstructionForOpTag(iTagLine);
2135
2136 # Flatten, split up and validate the values.
2137 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2138 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2139 asFlags = [];
2140 else:
2141 fRc = True;
2142 for iFlag, sFlag in enumerate(asFlags):
2143 if sFlag not in g_kdEFlagsMnemonics:
2144 if sFlag.strip() in g_kdEFlagsMnemonics:
2145 asFlags[iFlag] = sFlag.strip();
2146 else:
2147 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2148 if not fRc:
2149 return False;
2150
2151 # Set them.
2152 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2153 if asOld is not None:
2154 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2155 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2156
2157 _ = iEndLine;
2158 return True;
2159
2160 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2161 """
2162 Tag: \@ophints
2163 Value: Comma or space separated list of flags and hints.
2164
2165 This covers the disassembler flags table and more.
2166 """
2167 oInstr = self.ensureInstructionForOpTag(iTagLine);
2168
2169 # Flatten as a space separated list, split it up and validate the values.
2170 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2171 if len(asHints) == 1 and asHints[0].lower() == 'none':
2172 asHints = [];
2173 else:
2174 fRc = True;
2175 for iHint, sHint in enumerate(asHints):
2176 if sHint not in g_kdHints:
2177 if sHint.strip() in g_kdHints:
2178 sHint[iHint] = sHint.strip();
2179 else:
2180 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2181 if not fRc:
2182 return False;
2183
2184 # Append them.
2185 for sHint in asHints:
2186 if sHint not in oInstr.dHints:
2187 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2188 else:
2189 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2190
2191 _ = iEndLine;
2192 return True;
2193
2194 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2195 """
2196 Tag: \@opdisenum
2197 Value: OP_XXXX
2198
2199 This is for select a specific (legacy) disassembler enum value for the
2200 instruction.
2201 """
2202 oInstr = self.ensureInstructionForOpTag(iTagLine);
2203
2204 # Flatten and split.
2205 asWords = self.flattenAllSections(aasSections).split();
2206 if len(asWords) != 1:
2207 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2208 if not asWords:
2209 return False;
2210 sDisEnum = asWords[0];
2211 if not self.oReDisEnum.match(sDisEnum):
2212 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2213 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2214
2215 # Set it.
2216 if oInstr.sDisEnum is not None:
2217 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2218 oInstr.sDisEnum = sDisEnum;
2219
2220 _ = iEndLine;
2221 return True;
2222
2223 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2224 """
2225 Tag: \@opmincpu
2226 Value: <simple CPU name>
2227
2228 Indicates when this instruction was introduced.
2229 """
2230 oInstr = self.ensureInstructionForOpTag(iTagLine);
2231
2232 # Flatten the value, split into words, make sure there's just one, valid it.
2233 asCpus = self.flattenAllSections(aasSections).split();
2234 if len(asCpus) > 1:
2235 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2236
2237 sMinCpu = asCpus[0];
2238 if sMinCpu in g_kdCpuNames:
2239 oInstr.sMinCpu = sMinCpu;
2240 else:
2241 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2242 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2243
2244 # Set it.
2245 if oInstr.sMinCpu is None:
2246 oInstr.sMinCpu = sMinCpu;
2247 elif oInstr.sMinCpu != sMinCpu:
2248 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2249
2250 _ = iEndLine;
2251 return True;
2252
2253 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2254 """
2255 Tag: \@opcpuid
2256 Value: none | <CPUID flag specifier>
2257
2258 CPUID feature bit which is required for the instruction to be present.
2259 """
2260 oInstr = self.ensureInstructionForOpTag(iTagLine);
2261
2262 # Flatten as a space separated list, split it up and validate the values.
2263 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2264 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2265 asCpuIds = [];
2266 else:
2267 fRc = True;
2268 for iCpuId, sCpuId in enumerate(asCpuIds):
2269 if sCpuId not in g_kdCpuIdFlags:
2270 if sCpuId.strip() in g_kdCpuIdFlags:
2271 sCpuId[iCpuId] = sCpuId.strip();
2272 else:
2273 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2274 if not fRc:
2275 return False;
2276
2277 # Append them.
2278 for sCpuId in asCpuIds:
2279 if sCpuId not in oInstr.asCpuIds:
2280 oInstr.asCpuIds.append(sCpuId);
2281 else:
2282 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2283
2284 _ = iEndLine;
2285 return True;
2286
2287 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2288 """
2289 Tag: \@opgroup
2290 Value: op_grp1[_subgrp2[_subsubgrp3]]
2291
2292 Instruction grouping.
2293 """
2294 oInstr = self.ensureInstructionForOpTag(iTagLine);
2295
2296 # Flatten as a space separated list, split it up and validate the values.
2297 asGroups = self.flattenAllSections(aasSections).split();
2298 if len(asGroups) != 1:
2299 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2300 sGroup = asGroups[0];
2301 if not self.oReGroupName.match(sGroup):
2302 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2303 % (sTag, sGroup, self.oReGroupName.pattern));
2304
2305 # Set it.
2306 if oInstr.sGroup is not None:
2307 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2308 oInstr.sGroup = sGroup;
2309
2310 _ = iEndLine;
2311 return True;
2312
2313 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2314 """
2315 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2316 Value: <invalid opcode behaviour style>
2317
2318 The \@opunused indicates the specification is for a currently unused
2319 instruction encoding.
2320
2321 The \@opinvalid indicates the specification is for an invalid currently
2322 instruction encoding (like UD2).
2323
2324 The \@opinvlstyle just indicates how CPUs decode the instruction when
2325 not supported (\@opcpuid, \@opmincpu) or disabled.
2326 """
2327 oInstr = self.ensureInstructionForOpTag(iTagLine);
2328
2329 # Flatten as a space separated list, split it up and validate the values.
2330 asStyles = self.flattenAllSections(aasSections).split();
2331 if len(asStyles) != 1:
2332 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2333 sStyle = asStyles[0];
2334 if sStyle not in g_kdInvalidStyles:
2335 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2336 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2337 # Set it.
2338 if oInstr.sInvalidStyle is not None:
2339 return self.errorComment(iTagLine,
2340 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2341 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2342 oInstr.sInvalidStyle = sStyle;
2343 if sTag == '@opunused':
2344 oInstr.fUnused = True;
2345 elif sTag == '@opinvalid':
2346 oInstr.fInvalid = True;
2347
2348 _ = iEndLine;
2349 return True;
2350
2351 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2352 """
2353 Tag: \@optest
2354 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2355 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2356
2357 The main idea here is to generate basic instruction tests.
2358
2359 The probably simplest way of handling the diverse input, would be to use
2360 it to produce size optimized byte code for a simple interpreter that
2361 modifies the register input and output states.
2362
2363 An alternative to the interpreter would be creating multiple tables,
2364 but that becomes rather complicated wrt what goes where and then to use
2365 them in an efficient manner.
2366 """
2367 oInstr = self.ensureInstructionForOpTag(iTagLine);
2368
2369 #
2370 # Do it section by section.
2371 #
2372 for asSectionLines in aasSections:
2373 #
2374 # Sort the input into outputs, inputs and selector conditions.
2375 #
2376 sFlatSection = self.flattenAllSections([asSectionLines,]);
2377 if not sFlatSection:
2378 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2379 continue;
2380 oTest = InstructionTest(oInstr);
2381
2382 asSelectors = [];
2383 asInputs = [];
2384 asOutputs = [];
2385 asCur = asOutputs;
2386 fRc = True;
2387 asWords = sFlatSection.split();
2388 for iWord in range(len(asWords) - 1, -1, -1):
2389 sWord = asWords[iWord];
2390 # Check for array switchers.
2391 if sWord == '->':
2392 if asCur != asOutputs:
2393 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2394 break;
2395 asCur = asInputs;
2396 elif sWord == '/':
2397 if asCur != asInputs:
2398 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2399 break;
2400 asCur = asSelectors;
2401 else:
2402 asCur.insert(0, sWord);
2403
2404 #
2405 # Validate and add selectors.
2406 #
2407 for sCond in asSelectors:
2408 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2409 oSelector = None;
2410 for sOp in TestSelector.kasCompareOps:
2411 off = sCondExp.find(sOp);
2412 if off >= 0:
2413 sVariable = sCondExp[:off];
2414 sValue = sCondExp[off + len(sOp):];
2415 if sVariable in TestSelector.kdVariables:
2416 if sValue in TestSelector.kdVariables[sVariable]:
2417 oSelector = TestSelector(sVariable, sOp, sValue);
2418 else:
2419 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2420 % ( sTag, sValue, sCond,
2421 TestSelector.kdVariables[sVariable].keys(),));
2422 else:
2423 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2424 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2425 break;
2426 if oSelector is not None:
2427 for oExisting in oTest.aoSelectors:
2428 if oExisting.sVariable == oSelector.sVariable:
2429 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2430 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2431 oTest.aoSelectors.append(oSelector);
2432 else:
2433 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2434
2435 #
2436 # Validate outputs and inputs, adding them to the test as we go along.
2437 #
2438 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2439 asValidFieldKinds = [ 'both', sDesc, ];
2440 for sItem in asItems:
2441 oItem = None;
2442 for sOp in TestInOut.kasOperators:
2443 off = sItem.find(sOp);
2444 if off < 0:
2445 continue;
2446 sField = sItem[:off];
2447 sValueType = sItem[off + len(sOp):];
2448 if sField in TestInOut.kdFields \
2449 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2450 asSplit = sValueType.split(':', 1);
2451 sValue = asSplit[0];
2452 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2453 if sType in TestInOut.kdTypes:
2454 oValid = TestInOut.kdTypes[sType].validate(sValue);
2455 if oValid is True:
2456 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2457 oItem = TestInOut(sField, sOp, sValue, sType);
2458 else:
2459 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2460 % ( sTag, sDesc, sItem, ));
2461 else:
2462 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2463 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2464 else:
2465 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2466 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2467 else:
2468 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2469 % ( sTag, sDesc, sField, sItem,
2470 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2471 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2472 break;
2473 if oItem is not None:
2474 for oExisting in aoDst:
2475 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2476 self.errorComment(iTagLine,
2477 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2478 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2479 aoDst.append(oItem);
2480 else:
2481 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2482
2483 #
2484 # .
2485 #
2486 if fRc:
2487 oInstr.aoTests.append(oTest);
2488 else:
2489 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2490 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2491 % (sTag, asSelectors, asInputs, asOutputs,));
2492
2493 _ = iEndLine;
2494 return True;
2495
2496 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2497 """
2498 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2499 """
2500 oInstr = self.ensureInstructionForOpTag(iTagLine);
2501
2502 iTest = 0;
2503 if sTag[-1] == ']':
2504 iTest = int(sTag[8:-1]);
2505 else:
2506 iTest = int(sTag[7:]);
2507
2508 if iTest != len(oInstr.aoTests):
2509 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2510 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2511
2512 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2513 """
2514 Tag: \@optestign | \@optestignore
2515 Value: <value is ignored>
2516
2517 This is a simple trick to ignore a test while debugging another.
2518
2519 See also \@oponlytest.
2520 """
2521 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2522 return True;
2523
2524 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2525 """
2526 Tag: \@opcopytests
2527 Value: <opstat | function> [..]
2528 Example: \@opcopytests add_Eb_Gb
2529
2530 Trick to avoid duplicating tests for different encodings of the same
2531 operation.
2532 """
2533 oInstr = self.ensureInstructionForOpTag(iTagLine);
2534
2535 # Flatten, validate and append the copy job to the instruction. We execute
2536 # them after parsing all the input so we can handle forward references.
2537 asToCopy = self.flattenAllSections(aasSections).split();
2538 if not asToCopy:
2539 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2540 for sToCopy in asToCopy:
2541 if sToCopy not in oInstr.asCopyTests:
2542 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2543 oInstr.asCopyTests.append(sToCopy);
2544 else:
2545 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2546 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2547 else:
2548 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2549
2550 _ = iEndLine;
2551 return True;
2552
2553 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2554 """
2555 Tag: \@oponlytest | \@oponly
2556 Value: none
2557
2558 Only test instructions with this tag. This is a trick that is handy
2559 for singling out one or two new instructions or tests.
2560
2561 See also \@optestignore.
2562 """
2563 oInstr = self.ensureInstructionForOpTag(iTagLine);
2564
2565 # Validate and add instruction to only test dictionary.
2566 sValue = self.flattenAllSections(aasSections).strip();
2567 if sValue:
2568 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2569
2570 if oInstr not in g_aoOnlyTestInstructions:
2571 g_aoOnlyTestInstructions.append(oInstr);
2572
2573 _ = iEndLine;
2574 return True;
2575
2576 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2577 """
2578 Tag: \@opxcpttype
2579 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2580
2581 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2582 """
2583 oInstr = self.ensureInstructionForOpTag(iTagLine);
2584
2585 # Flatten as a space separated list, split it up and validate the values.
2586 asTypes = self.flattenAllSections(aasSections).split();
2587 if len(asTypes) != 1:
2588 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2589 sType = asTypes[0];
2590 if sType not in g_kdXcptTypes:
2591 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2592 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2593 # Set it.
2594 if oInstr.sXcptType is not None:
2595 return self.errorComment(iTagLine,
2596 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2597 % ( sTag, oInstr.sXcptType, sType,));
2598 oInstr.sXcptType = sType;
2599
2600 _ = iEndLine;
2601 return True;
2602
2603 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2604 """
2605 Tag: \@opfunction
2606 Value: <VMM function name>
2607
2608 This is for explicitly setting the IEM function name. Normally we pick
2609 this up from the FNIEMOP_XXX macro invocation after the description, or
2610 generate it from the mnemonic and operands.
2611
2612 It it thought it maybe necessary to set it when specifying instructions
2613 which implementation isn't following immediately or aren't implemented yet.
2614 """
2615 oInstr = self.ensureInstructionForOpTag(iTagLine);
2616
2617 # Flatten and validate the value.
2618 sFunction = self.flattenAllSections(aasSections);
2619 if not self.oReFunctionName.match(sFunction):
2620 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2621 % (sTag, sFunction, self.oReFunctionName.pattern));
2622
2623 if oInstr.sFunction is not None:
2624 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2625 % (sTag, oInstr.sFunction, sFunction,));
2626 oInstr.sFunction = sFunction;
2627
2628 _ = iEndLine;
2629 return True;
2630
2631 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2632 """
2633 Tag: \@opstats
2634 Value: <VMM statistics base name>
2635
2636 This is for explicitly setting the statistics name. Normally we pick
2637 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2638 the mnemonic and operands.
2639
2640 It it thought it maybe necessary to set it when specifying instructions
2641 which implementation isn't following immediately or aren't implemented yet.
2642 """
2643 oInstr = self.ensureInstructionForOpTag(iTagLine);
2644
2645 # Flatten and validate the value.
2646 sStats = self.flattenAllSections(aasSections);
2647 if not self.oReStatsName.match(sStats):
2648 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2649 % (sTag, sStats, self.oReStatsName.pattern));
2650
2651 if oInstr.sStats is not None:
2652 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2653 % (sTag, oInstr.sStats, sStats,));
2654 oInstr.sStats = sStats;
2655
2656 _ = iEndLine;
2657 return True;
2658
2659 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2660 """
2661 Tag: \@opdone
2662 Value: none
2663
2664 Used to explictily flush the instructions that have been specified.
2665 """
2666 sFlattened = self.flattenAllSections(aasSections);
2667 if sFlattened != '':
2668 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2669 _ = sTag; _ = iEndLine;
2670 return self.doneInstructions();
2671
2672 ## @}
2673
2674
2675 def parseComment(self):
2676 """
2677 Parse the current comment (self.sComment).
2678
2679 If it's a opcode specifiying comment, we reset the macro stuff.
2680 """
2681 #
2682 # Reject if comment doesn't seem to contain anything interesting.
2683 #
2684 if self.sComment.find('Opcode') < 0 \
2685 and self.sComment.find('@') < 0:
2686 return False;
2687
2688 #
2689 # Split the comment into lines, removing leading asterisks and spaces.
2690 # Also remove leading and trailing empty lines.
2691 #
2692 asLines = self.sComment.split('\n');
2693 for iLine, sLine in enumerate(asLines):
2694 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2695
2696 while asLines and not asLines[0]:
2697 self.iCommentLine += 1;
2698 asLines.pop(0);
2699
2700 while asLines and not asLines[-1]:
2701 asLines.pop(len(asLines) - 1);
2702
2703 #
2704 # Check for old style: Opcode 0x0f 0x12
2705 #
2706 if asLines[0].startswith('Opcode '):
2707 self.parseCommentOldOpcode(asLines);
2708
2709 #
2710 # Look for @op* tagged data.
2711 #
2712 cOpTags = 0;
2713 sFlatDefault = None;
2714 sCurTag = '@default';
2715 iCurTagLine = 0;
2716 asCurSection = [];
2717 aasSections = [ asCurSection, ];
2718 for iLine, sLine in enumerate(asLines):
2719 if not sLine.startswith('@'):
2720 if sLine:
2721 asCurSection.append(sLine);
2722 elif asCurSection:
2723 asCurSection = [];
2724 aasSections.append(asCurSection);
2725 else:
2726 #
2727 # Process the previous tag.
2728 #
2729 if not asCurSection and len(aasSections) > 1:
2730 aasSections.pop(-1);
2731 if sCurTag in self.dTagHandlers:
2732 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2733 cOpTags += 1;
2734 elif sCurTag.startswith('@op'):
2735 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2736 elif sCurTag == '@default':
2737 sFlatDefault = self.flattenAllSections(aasSections);
2738 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2739 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2740 elif sCurTag in ['@encoding', '@opencoding']:
2741 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2742
2743 #
2744 # New tag.
2745 #
2746 asSplit = sLine.split(None, 1);
2747 sCurTag = asSplit[0].lower();
2748 if len(asSplit) > 1:
2749 asCurSection = [asSplit[1],];
2750 else:
2751 asCurSection = [];
2752 aasSections = [asCurSection, ];
2753 iCurTagLine = iLine;
2754
2755 #
2756 # Process the final tag.
2757 #
2758 if not asCurSection and len(aasSections) > 1:
2759 aasSections.pop(-1);
2760 if sCurTag in self.dTagHandlers:
2761 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2762 cOpTags += 1;
2763 elif sCurTag.startswith('@op'):
2764 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2765 elif sCurTag == '@default':
2766 sFlatDefault = self.flattenAllSections(aasSections);
2767
2768 #
2769 # Don't allow default text in blocks containing @op*.
2770 #
2771 if cOpTags > 0 and sFlatDefault:
2772 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2773
2774 return True;
2775
2776 def parseMacroInvocation(self, sInvocation):
2777 """
2778 Parses a macro invocation.
2779
2780 Returns a tuple, first element is the offset following the macro
2781 invocation. The second element is a list of macro arguments, where the
2782 zero'th is the macro name.
2783 """
2784 # First the name.
2785 offOpen = sInvocation.find('(');
2786 if offOpen <= 0:
2787 self.raiseError("macro invocation open parenthesis not found");
2788 sName = sInvocation[:offOpen].strip();
2789 if not self.oReMacroName.match(sName):
2790 return self.error("invalid macro name '%s'" % (sName,));
2791 asRet = [sName, ];
2792
2793 # Arguments.
2794 iLine = self.iLine;
2795 cDepth = 1;
2796 off = offOpen + 1;
2797 offStart = off;
2798 while cDepth > 0:
2799 if off >= len(sInvocation):
2800 if iLine >= len(self.asLines):
2801 return self.error('macro invocation beyond end of file');
2802 sInvocation += self.asLines[iLine];
2803 iLine += 1;
2804 ch = sInvocation[off];
2805
2806 if ch == ',' or ch == ')':
2807 if cDepth == 1:
2808 asRet.append(sInvocation[offStart:off].strip());
2809 offStart = off + 1;
2810 if ch == ')':
2811 cDepth -= 1;
2812 elif ch == '(':
2813 cDepth += 1;
2814 off += 1;
2815
2816 return (off, asRet);
2817
2818 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2819 """
2820 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2821 """
2822 offHit = sCode.find(sMacro);
2823 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2824 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2825 return (offHit + offAfter, asRet);
2826 return (len(sCode), None);
2827
2828 def findAndParseMacroInvocation(self, sCode, sMacro):
2829 """
2830 Returns None if not found, arguments as per parseMacroInvocation if found.
2831 """
2832 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2833
2834 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2835 """
2836 Returns same as findAndParseMacroInvocation.
2837 """
2838 for sMacro in asMacro:
2839 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2840 if asRet is not None:
2841 return asRet;
2842 return None;
2843
2844 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2845 sDisHints, sIemHints, asOperands):
2846 """
2847 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2848 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2849 """
2850 #
2851 # Some invocation checks.
2852 #
2853 if sUpper != sUpper.upper():
2854 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2855 if sLower != sLower.lower():
2856 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2857 if sUpper.lower() != sLower:
2858 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2859 if not self.oReMnemonic.match(sLower):
2860 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2861
2862 #
2863 # Check if sIemHints tells us to not consider this macro invocation.
2864 #
2865 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2866 return True;
2867
2868 # Apply to the last instruction only for now.
2869 if not self.aoCurInstrs:
2870 self.addInstruction();
2871 oInstr = self.aoCurInstrs[-1];
2872 if oInstr.iLineMnemonicMacro == -1:
2873 oInstr.iLineMnemonicMacro = self.iLine;
2874 else:
2875 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2876 % (sMacro, oInstr.iLineMnemonicMacro,));
2877
2878 # Mnemonic
2879 if oInstr.sMnemonic is None:
2880 oInstr.sMnemonic = sLower;
2881 elif oInstr.sMnemonic != sLower:
2882 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2883
2884 # Process operands.
2885 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2886 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2887 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2888 for iOperand, sType in enumerate(asOperands):
2889 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2890 if sWhere is None:
2891 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2892 if iOperand < len(oInstr.aoOperands): # error recovery.
2893 sWhere = oInstr.aoOperands[iOperand].sWhere;
2894 sType = oInstr.aoOperands[iOperand].sType;
2895 else:
2896 sWhere = 'reg';
2897 sType = 'Gb';
2898 if iOperand == len(oInstr.aoOperands):
2899 oInstr.aoOperands.append(Operand(sWhere, sType))
2900 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2901 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2902 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2903 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2904
2905 # Encoding.
2906 if sForm not in g_kdIemForms:
2907 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2908 else:
2909 if oInstr.sEncoding is None:
2910 oInstr.sEncoding = g_kdIemForms[sForm][0];
2911 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2912 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2913 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2914
2915 # Check the parameter locations for the encoding.
2916 if g_kdIemForms[sForm][1] is not None:
2917 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2918 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2919 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2920 else:
2921 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2922 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2923 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2924 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2925
2926 # Stats.
2927 if not self.oReStatsName.match(sStats):
2928 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2929 elif oInstr.sStats is None:
2930 oInstr.sStats = sStats;
2931 elif oInstr.sStats != sStats:
2932 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2933 % (sMacro, oInstr.sStats, sStats,));
2934
2935 # Process the hints (simply merge with @ophints w/o checking anything).
2936 for sHint in sDisHints.split('|'):
2937 sHint = sHint.strip();
2938 if sHint.startswith('DISOPTYPE_'):
2939 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2940 if sShortHint in g_kdHints:
2941 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2942 else:
2943 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2944 elif sHint != '0':
2945 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2946
2947 for sHint in sIemHints.split('|'):
2948 sHint = sHint.strip();
2949 if sHint.startswith('IEMOPHINT_'):
2950 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2951 if sShortHint in g_kdHints:
2952 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2953 else:
2954 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2955 elif sHint != '0':
2956 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2957
2958
2959 _ = sAsm;
2960 return True;
2961
2962 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2963 """
2964 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2965 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2966 """
2967 if not asOperands:
2968 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2969 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2970 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2971
2972 def checkCodeForMacro(self, sCode):
2973 """
2974 Checks code for relevant macro invocation.
2975 """
2976 #
2977 # Scan macro invocations.
2978 #
2979 if sCode.find('(') > 0:
2980 # Look for instruction decoder function definitions. ASSUME single line.
2981 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2982 [ 'FNIEMOP_DEF',
2983 'FNIEMOP_STUB',
2984 'FNIEMOP_STUB_1',
2985 'FNIEMOP_UD_STUB',
2986 'FNIEMOP_UD_STUB_1' ]);
2987 if asArgs is not None:
2988 sFunction = asArgs[1];
2989
2990 if not self.aoCurInstrs:
2991 self.addInstruction();
2992 for oInstr in self.aoCurInstrs:
2993 if oInstr.iLineFnIemOpMacro == -1:
2994 oInstr.iLineFnIemOpMacro = self.iLine;
2995 else:
2996 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2997 self.setInstrunctionAttrib('sFunction', sFunction);
2998 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
2999 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3000 if asArgs[0].find('STUB') > 0:
3001 self.doneInstructions();
3002 return True;
3003
3004 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3005 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3006 if asArgs is not None:
3007 if len(self.aoCurInstrs) == 1:
3008 oInstr = self.aoCurInstrs[0];
3009 if oInstr.sStats is None:
3010 oInstr.sStats = asArgs[1];
3011 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3012
3013 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3014 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3015 if asArgs is not None:
3016 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3017 []);
3018 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3019 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3020 if asArgs is not None:
3021 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3022 [asArgs[6],]);
3023 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3024 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3025 if asArgs is not None:
3026 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3027 [asArgs[6], asArgs[7]]);
3028 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3029 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3030 if asArgs is not None:
3031 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3032 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3033 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3034 # a_fIemHints)
3035 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3036 if asArgs is not None:
3037 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3038 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3039
3040 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3041 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3042 if asArgs is not None:
3043 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3044 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3045 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3046 if asArgs is not None:
3047 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3048 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3049 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3050 if asArgs is not None:
3051 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3052 [asArgs[4], asArgs[5],]);
3053 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3054 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3055 if asArgs is not None:
3056 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3057 [asArgs[4], asArgs[5], asArgs[6],]);
3058 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3059 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3060 if asArgs is not None:
3061 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3062 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3063
3064 return False;
3065
3066
3067 def parse(self):
3068 """
3069 Parses the given file.
3070 Returns number or errors.
3071 Raises exception on fatal trouble.
3072 """
3073 self.debug('Parsing %s' % (self.sSrcFile,));
3074
3075 while self.iLine < len(self.asLines):
3076 sLine = self.asLines[self.iLine];
3077 self.iLine += 1;
3078
3079 # We only look for comments, so only lines with a slash might possibly
3080 # influence the parser state.
3081 if sLine.find('/') >= 0:
3082 #self.debug('line %d: slash' % (self.iLine,));
3083
3084 offLine = 0;
3085 while offLine < len(sLine):
3086 if self.iState == self.kiCode:
3087 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3088 if offHit >= 0:
3089 self.checkCodeForMacro(sLine[offLine:offHit]);
3090 self.sComment = '';
3091 self.iCommentLine = self.iLine;
3092 self.iState = self.kiCommentMulti;
3093 offLine = offHit + 2;
3094 else:
3095 self.checkCodeForMacro(sLine[offLine:]);
3096 offLine = len(sLine);
3097
3098 elif self.iState == self.kiCommentMulti:
3099 offHit = sLine.find('*/', offLine);
3100 if offHit >= 0:
3101 self.sComment += sLine[offLine:offHit];
3102 self.iState = self.kiCode;
3103 offLine = offHit + 2;
3104 self.parseComment();
3105 else:
3106 self.sComment += sLine[offLine:];
3107 offLine = len(sLine);
3108 else:
3109 assert False;
3110
3111 # No slash, but append the line if in multi-line comment.
3112 elif self.iState == self.kiCommentMulti:
3113 #self.debug('line %d: multi' % (self.iLine,));
3114 self.sComment += sLine;
3115
3116 # No slash, but check code line for relevant macro.
3117 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3118 #self.debug('line %d: macro' % (self.iLine,));
3119 self.checkCodeForMacro(sLine);
3120
3121 # If the line is a '}' in the first position, complete the instructions.
3122 elif self.iState == self.kiCode and sLine[0] == '}':
3123 #self.debug('line %d: }' % (self.iLine,));
3124 self.doneInstructions();
3125
3126 self.doneInstructions();
3127 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
3128 self.debug('%s instruction stubs' % (self.cTotalStubs,));
3129 return self.printErrors();
3130
3131
3132def __parseFileByName(sSrcFile, sDefaultMap):
3133 """
3134 Parses one source file for instruction specfications.
3135 """
3136 #
3137 # Read sSrcFile into a line array.
3138 #
3139 try:
3140 oFile = open(sSrcFile, "r");
3141 except Exception as oXcpt:
3142 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3143 try:
3144 asLines = oFile.readlines();
3145 except Exception as oXcpt:
3146 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3147 finally:
3148 oFile.close();
3149
3150 #
3151 # Do the parsing.
3152 #
3153 try:
3154 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3155 except ParserException as oXcpt:
3156 print(str(oXcpt));
3157 raise;
3158 except Exception as oXcpt:
3159 raise;
3160
3161 return cErrors;
3162
3163
3164def __doTestCopying():
3165 """
3166 Executes the asCopyTests instructions.
3167 """
3168 asErrors = [];
3169 for oDstInstr in g_aoAllInstructions:
3170 if oDstInstr.asCopyTests:
3171 for sSrcInstr in oDstInstr.asCopyTests:
3172 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3173 if oSrcInstr:
3174 aoSrcInstrs = [oSrcInstr,];
3175 else:
3176 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3177 if aoSrcInstrs:
3178 for oSrcInstr in aoSrcInstrs:
3179 if oSrcInstr != oDstInstr:
3180 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3181 else:
3182 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3183 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3184 else:
3185 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3186 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3187
3188 if asErrors:
3189 sys.stderr.write(u''.join(asErrors));
3190 return len(asErrors);
3191
3192
3193def __applyOnlyTest():
3194 """
3195 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3196 all other instructions so that only these get tested.
3197 """
3198 if g_aoOnlyTestInstructions:
3199 for oInstr in g_aoAllInstructions:
3200 if oInstr.aoTests:
3201 if oInstr not in g_aoOnlyTestInstructions:
3202 oInstr.aoTests = [];
3203 return 0;
3204
3205def __parseAll():
3206 """
3207 Parses all the IEMAllInstruction*.cpp.h files.
3208
3209 Raises exception on failure.
3210 """
3211 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3212 cErrors = 0;
3213 for sDefaultMap, sName in [
3214 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3215 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3216 ]:
3217 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3218 cErrors += __doTestCopying();
3219 cErrors += __applyOnlyTest();
3220
3221 if cErrors != 0:
3222 #raise Exception('%d parse errors' % (cErrors,));
3223 sys.exit(1);
3224 return True;
3225
3226
3227
3228__parseAll();
3229
3230
3231#
3232# Generators (may perhaps move later).
3233#
3234def generateDisassemblerTables(oDstFile = sys.stdout):
3235 """
3236 Generates disassembler tables.
3237 """
3238
3239 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3240 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3241 assert oMap.sName == sName;
3242 asLines = [];
3243
3244 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3245 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3246 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3247 asLines.append('{');
3248
3249 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3250
3251 aoTableOrder = oMap.getInstructionsInTableOrder();
3252 for iInstr, oInstr in enumerate(aoTableOrder):
3253
3254 if (iInstr & 0xf) == 0:
3255 if iInstr != 0:
3256 asLines.append('');
3257 asLines.append(' /* %x */' % (iInstr >> 4,));
3258
3259 if oInstr is None:
3260 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3261 elif isinstance(oInstr, list):
3262 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3263 else:
3264 sMacro = 'OP';
3265 cMaxOperands = 3;
3266 if len(oInstr.aoOperands) > 3:
3267 sMacro = 'OPVEX'
3268 cMaxOperands = 4;
3269 assert len(oInstr.aoOperands) <= cMaxOperands;
3270
3271 #
3272 # Format string.
3273 #
3274 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3275 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3276 sTmp += ' ' if iOperand == 0 else ',';
3277 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3278 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3279 else:
3280 sTmp += g_kdOpTypes[oOperand.sType][2];
3281 sTmp += '",';
3282 asColumns = [ sTmp, ];
3283
3284 #
3285 # Decoders.
3286 #
3287 iStart = len(asColumns);
3288 if oInstr.sEncoding is None:
3289 pass;
3290 elif oInstr.sEncoding == 'ModR/M':
3291 # ASSUME the first operand is using the ModR/M encoding
3292 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3293 asColumns.append('IDX_ParseModRM,');
3294 ## @todo IDX_ParseVexDest
3295 # Is second operand using ModR/M too?
3296 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3297 asColumns.append('IDX_UseModRM,')
3298 elif oInstr.sEncoding in [ 'prefix', ]:
3299 for oOperand in oInstr.aoOperands:
3300 asColumns.append('0,');
3301 elif oInstr.sEncoding in [ 'fixed' ]:
3302 pass;
3303 elif oInstr.sEncoding == 'vex2':
3304 asColumns.append('IDX_ParseVex2b,')
3305 elif oInstr.sEncoding == 'vex3':
3306 asColumns.append('IDX_ParseVex3b,')
3307 elif oInstr.sEncoding in g_dInstructionMaps:
3308 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3309 else:
3310 ## @todo
3311 #IDX_ParseTwoByteEsc,
3312 #IDX_ParseGrp1,
3313 #IDX_ParseShiftGrp2,
3314 #IDX_ParseGrp3,
3315 #IDX_ParseGrp4,
3316 #IDX_ParseGrp5,
3317 #IDX_Parse3DNow,
3318 #IDX_ParseGrp6,
3319 #IDX_ParseGrp7,
3320 #IDX_ParseGrp8,
3321 #IDX_ParseGrp9,
3322 #IDX_ParseGrp10,
3323 #IDX_ParseGrp12,
3324 #IDX_ParseGrp13,
3325 #IDX_ParseGrp14,
3326 #IDX_ParseGrp15,
3327 #IDX_ParseGrp16,
3328 #IDX_ParseThreeByteEsc4,
3329 #IDX_ParseThreeByteEsc5,
3330 #IDX_ParseModFence,
3331 #IDX_ParseEscFP,
3332 #IDX_ParseNopPause,
3333 #IDX_ParseInvOpModRM,
3334 assert False, str(oInstr);
3335
3336 # Check for immediates and stuff in the remaining operands.
3337 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3338 sIdx = g_kdOpTypes[oOperand.sType][0];
3339 if sIdx != 'IDX_UseModRM':
3340 asColumns.append(sIdx + ',');
3341 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3342
3343 #
3344 # Opcode and operands.
3345 #
3346 assert oInstr.sDisEnum, str(oInstr);
3347 asColumns.append(oInstr.sDisEnum + ',');
3348 iStart = len(asColumns)
3349 for oOperand in oInstr.aoOperands:
3350 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3351 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3352
3353 #
3354 # Flags.
3355 #
3356 sTmp = '';
3357 for sHint in sorted(oInstr.dHints.keys()):
3358 sDefine = g_kdHints[sHint];
3359 if sDefine.startswith('DISOPTYPE_'):
3360 if sTmp:
3361 sTmp += ' | ' + sDefine;
3362 else:
3363 sTmp += sDefine;
3364 if sTmp:
3365 sTmp += '),';
3366 else:
3367 sTmp += '0),';
3368 asColumns.append(sTmp);
3369
3370 #
3371 # Format the columns into a line.
3372 #
3373 sLine = '';
3374 for i, s in enumerate(asColumns):
3375 if len(sLine) < aoffColumns[i]:
3376 sLine += ' ' * (aoffColumns[i] - len(sLine));
3377 else:
3378 sLine += ' ';
3379 sLine += s;
3380
3381 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3382 # DISOPTYPE_HARMLESS),
3383 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3384 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3385
3386 asLines.append(sLine);
3387
3388 asLines.append('};');
3389 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3390
3391 #
3392 # Write out the lines.
3393 #
3394 oDstFile.write('\n'.join(asLines));
3395 oDstFile.write('\n');
3396 break; #for now
3397
3398if __name__ == '__main__':
3399 generateDisassemblerTables();
3400
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette