VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66747

Last change on this file since 66747 was 66747, checked in by vboxsync, 8 years ago

IEM: Implemented movlps Mq,Vq (0x0f 0x13).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 141.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66747 2017-05-02 14:00:02Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66747 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224
225 # ModR/M.rm - register only.
226 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
227 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
228
229 # ModR/M.rm - memory only.
230 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
231 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
232 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
233 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
234 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
235 'MqWO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
236 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
237 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
238
239 # ModR/M.reg
240 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
241 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
242 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
243 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
244 'VssZxReg': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
245 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
246 'VsdZxReg': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
247 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
248 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
249 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
250 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
251
252 # Immediate values.
253 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
254 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
255 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
256 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
257 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
258 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
259
260 # Address operands (no ModR/M).
261 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
262 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
263
264 # Relative jump targets
265 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
266 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
267
268 # DS:rSI
269 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
270 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
271 # ES:rDI
272 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
273 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
274
275 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
276
277 # Fixed registers.
278 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
279 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
280 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
281 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
282 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
283 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
284 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
285 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
286};
287
288# IDX_ParseFixedReg
289# IDX_ParseVexDest
290
291
292## IEMFORM_XXX mappings.
293g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
294 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
295 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
296 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
297 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
298 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
299 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
300 'M': ( 'ModR/M', [ 'rm', ], ),
301 'M_REG': ( 'ModR/M', [ 'rm', ], ),
302 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
303 'R': ( 'ModR/M', [ 'reg', ], ),
304
305 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
306 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
307 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
308 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
309 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
310 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
311 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
312 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
313 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
314 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
315 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
316 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
317
318 'FIXED': ( 'fixed', None, )
319};
320
321## \@oppfx values.
322g_kdPrefixes = {
323 'none': [],
324 '0x66': [],
325 '0xf3': [],
326 '0xf2': [],
327};
328
329## Special \@opcode tag values.
330g_kdSpecialOpcodes = {
331 '/reg': [],
332 'mr/reg': [],
333 '11 /reg': [],
334 '!11 /reg': [],
335 '11 mr/reg': [],
336 '!11 mr/reg': [],
337};
338
339## Special \@opcodesub tag values.
340g_kdSubOpcodes = {
341 'none': [ None, ],
342 '11 mr/reg': [ '11 mr/reg', ],
343 '11': [ '11 mr/reg', ], ##< alias
344 '!11 mr/reg': [ '!11 mr/reg', ],
345 '!11': [ '!11 mr/reg', ], ##< alias
346};
347
348## Valid values for \@openc
349g_kdEncodings = {
350 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
351 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
352 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
353 'prefix': [ None, ], ##< Prefix
354};
355
356## \@opunused, \@opinvalid, \@opinvlstyle
357g_kdInvalidStyles = {
358 'immediate': [], ##< CPU stops decoding immediately after the opcode.
359 'intel-modrm': [], ##< Intel decodes ModR/M.
360 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
361 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
362 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
363};
364
365g_kdCpuNames = {
366 '8086': (),
367 '80186': (),
368 '80286': (),
369 '80386': (),
370 '80486': (),
371};
372
373## \@opcpuid
374g_kdCpuIdFlags = {
375 'vme': 'X86_CPUID_FEATURE_EDX_VME',
376 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
377 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
378 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
379 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
380 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
381 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
382 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
383 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
384 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
385 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
386 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
387 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
388 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
389 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
390 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
391 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
392 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
393 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
394 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
395 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
396 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
397 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
398 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
399 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
400 'aes': 'X86_CPUID_FEATURE_ECX_AES',
401 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
402 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
403 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
404 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
405
406 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
407 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
408 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
409 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
410 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
411 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
412 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
413 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
414 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
415 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
416};
417
418## \@ophints values.
419g_kdHints = {
420 'invalid': 'DISOPTYPE_INVALID', ##<
421 'harmless': 'DISOPTYPE_HARMLESS', ##<
422 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
423 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
424 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
425 'portio': 'DISOPTYPE_PORTIO', ##<
426 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
427 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
428 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
429 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
430 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
431 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
432 'illegal': 'DISOPTYPE_ILLEGAL', ##<
433 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
434 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
435 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
436 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
437 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
438 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
439 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
440 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
441 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
442 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
443 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
444 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
445 ## (only in 16 & 32 bits mode!)
446 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
447 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
448 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
449 'ignores_op_size': '', ##< Ignores both operand size prefixes.
450 'lock_allowed': '', ##< Lock prefix allowed.
451};
452
453## \@opxcpttype values (see SDMv2 2.4, 2.7).
454g_kdXcptTypes = {
455 'none': [],
456 '1': [],
457 '2': [],
458 '3': [],
459 '4': [],
460 '4UA': [],
461 '5': [],
462 '6': [],
463 '7': [],
464 '8': [],
465 '11': [],
466 '12': [],
467 'E1': [],
468 'E1NF': [],
469 'E2': [],
470 'E3': [],
471 'E3NF': [],
472 'E4': [],
473 'E4NF': [],
474 'E5': [],
475 'E5NF': [],
476 'E6': [],
477 'E6NF': [],
478 'E7NF': [],
479 'E9': [],
480 'E9NF': [],
481 'E10': [],
482 'E11': [],
483 'E12': [],
484 'E12NF': [],
485};
486
487
488def _isValidOpcodeByte(sOpcode):
489 """
490 Checks if sOpcode is a valid lower case opcode byte.
491 Returns true/false.
492 """
493 if len(sOpcode) == 4:
494 if sOpcode[:2] == '0x':
495 if sOpcode[2] in '0123456789abcdef':
496 if sOpcode[3] in '0123456789abcdef':
497 return True;
498 return False;
499
500
501class InstructionMap(object):
502 """
503 Instruction map.
504
505 The opcode map provides the lead opcode bytes (empty for the one byte
506 opcode map). An instruction can be member of multiple opcode maps as long
507 as it uses the same opcode value within the map (because of VEX).
508 """
509
510 kdEncodings = {
511 'legacy': [],
512 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
513 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
514 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
515 'xop8': [], ##< XOP prefix with vvvvv = 8
516 'xop9': [], ##< XOP prefix with vvvvv = 9
517 'xop10': [], ##< XOP prefix with vvvvv = 10
518 };
519 ## Selectors.
520 ## The first value is the number of table entries required by a
521 ## decoder or disassembler for this type of selector.
522 kdSelectors = {
523 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
524 '/r': [ 8, ], ##< modrm.reg selects the instruction.
525 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
526 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
527 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
528 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
529 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
530 };
531
532 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
533 assert sSelector in self.kdSelectors;
534 assert sEncoding in self.kdEncodings;
535 if asLeadOpcodes is None:
536 asLeadOpcodes = [];
537 else:
538 for sOpcode in asLeadOpcodes:
539 assert _isValidOpcodeByte(sOpcode);
540 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
541
542 self.sName = sName;
543 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
544 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
545 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
546 self.aoInstructions = []; # type: Instruction
547 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
548
549 def getTableSize(self):
550 """
551 Number of table entries. This corresponds directly to the selector.
552 """
553 return self.kdSelectors[self.sSelector][0];
554
555 def getInstructionIndex(self, oInstr):
556 """
557 Returns the table index for the instruction.
558 """
559 bOpcode = oInstr.getOpcodeByte();
560
561 # The byte selector is simple. We need a full opcode byte and need just return it.
562 if self.sSelector == 'byte':
563 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
564 return bOpcode;
565
566 # The other selectors needs masking and shifting.
567 if self.sSelector == '/r':
568 return (bOpcode >> 3) & 0x7;
569
570 if self.sSelector == 'mod /r':
571 return (bOpcode >> 3) & 0x1f;
572
573 if self.sSelector == 'memreg /r':
574 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
575
576 if self.sSelector == '!11 /r':
577 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
578 return (bOpcode >> 3) & 0x7;
579
580 if self.sSelector == '11 /r':
581 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
582 return (bOpcode >> 3) & 0x7;
583
584 if self.sSelector == '11':
585 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
586 return bOpcode & 0x3f;
587
588 assert False, self.sSelector;
589 return -1;
590
591 def getInstructionsInTableOrder(self):
592 """
593 Get instructions in table order.
594
595 Returns array of instructions. Normally there is exactly one
596 instruction per entry. However the entry could also be None if
597 not instruction was specified for that opcode value. Or there
598 could be a list of instructions to deal with special encodings
599 where for instance prefix (e.g. REX.W) encodes a different
600 instruction or different CPUs have different instructions or
601 prefixes in the same place.
602 """
603 # Start with empty table.
604 cTable = self.getTableSize();
605 aoTable = [None] * cTable;
606
607 # Insert the instructions.
608 for oInstr in self.aoInstructions:
609 if oInstr.sOpcode:
610 idxOpcode = self.getInstructionIndex(oInstr);
611 assert idxOpcode < cTable, str(idxOpcode);
612
613 oExisting = aoTable[idxOpcode];
614 if oExisting is None:
615 aoTable[idxOpcode] = oInstr;
616 elif not isinstance(oExisting, list):
617 aoTable[idxOpcode] = list([oExisting, oInstr]);
618 else:
619 oExisting.append(oInstr);
620
621 return aoTable;
622
623
624 def getDisasTableName(self):
625 """
626 Returns the disassembler table name for this map.
627 """
628 sName = 'g_aDisas';
629 for sWord in self.sName.split('_'):
630 if sWord == 'm': # suffix indicating modrm.mod==mem
631 sName += '_m';
632 elif sWord == 'r': # suffix indicating modrm.mod==reg
633 sName += '_r';
634 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
635 sName += '_' + sWord;
636 else:
637 sWord = sWord.replace('grp', 'Grp');
638 sWord = sWord.replace('map', 'Map');
639 sName += sWord[0].upper() + sWord[1:];
640 return sName;
641
642
643class TestType(object):
644 """
645 Test value type.
646
647 This base class deals with integer like values. The fUnsigned constructor
648 parameter indicates the default stance on zero vs sign extending. It is
649 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
650 """
651 def __init__(self, sName, acbSizes = None, fUnsigned = True):
652 self.sName = sName;
653 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
654 self.fUnsigned = fUnsigned;
655
656 class BadValue(Exception):
657 """ Bad value exception. """
658 def __init__(self, sMessage):
659 Exception.__init__(self, sMessage);
660 self.sMessage = sMessage;
661
662 ## For ascii ~ operator.
663 kdHexInv = {
664 '0': 'f',
665 '1': 'e',
666 '2': 'd',
667 '3': 'c',
668 '4': 'b',
669 '5': 'a',
670 '6': '9',
671 '7': '8',
672 '8': '7',
673 '9': '6',
674 'a': '5',
675 'b': '4',
676 'c': '3',
677 'd': '2',
678 'e': '1',
679 'f': '0',
680 };
681
682 def get(self, sValue):
683 """
684 Get the shortest normal sized byte representation of oValue.
685
686 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
687 The latter form is for AND+OR pairs where the first entry is what to
688 AND with the field and the second the one or OR with.
689
690 Raises BadValue if invalid value.
691 """
692 if not sValue:
693 raise TestType.BadValue('empty value');
694
695 # Deal with sign and detect hexadecimal or decimal.
696 fSignExtend = not self.fUnsigned;
697 if sValue[0] == '-' or sValue[0] == '+':
698 fSignExtend = True;
699 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
700 else:
701 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
702
703 # try convert it to long integer.
704 try:
705 iValue = long(sValue, 16 if fHex else 10);
706 except Exception as oXcpt:
707 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
708
709 # Convert the hex string and pad it to a decent value. Negative values
710 # needs to be manually converted to something non-negative (~-n + 1).
711 if iValue >= 0:
712 sHex = hex(iValue);
713 if sys.version_info[0] < 3:
714 assert sHex[-1] == 'L';
715 sHex = sHex[:-1];
716 assert sHex[:2] == '0x';
717 sHex = sHex[2:];
718 else:
719 sHex = hex(-iValue - 1);
720 if sys.version_info[0] < 3:
721 assert sHex[-1] == 'L';
722 sHex = sHex[:-1];
723 assert sHex[:2] == '0x';
724 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
725 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
726 sHex = 'f' + sHex;
727
728 cDigits = len(sHex);
729 if cDigits <= self.acbSizes[-1] * 2:
730 for cb in self.acbSizes:
731 cNaturalDigits = cb * 2;
732 if cDigits <= cNaturalDigits:
733 break;
734 else:
735 cNaturalDigits = self.acbSizes[-1] * 2;
736 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
737 assert isinstance(cNaturalDigits, int)
738
739 if cNaturalDigits != cDigits:
740 cNeeded = cNaturalDigits - cDigits;
741 if iValue >= 0:
742 sHex = ('0' * cNeeded) + sHex;
743 else:
744 sHex = ('f' * cNeeded) + sHex;
745
746 # Invert and convert to bytearray and return it.
747 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
748
749 return ((fSignExtend, abValue),);
750
751 def validate(self, sValue):
752 """
753 Returns True if value is okay, error message on failure.
754 """
755 try:
756 self.get(sValue);
757 except TestType.BadValue as oXcpt:
758 return oXcpt.sMessage;
759 return True;
760
761 def isAndOrPair(self, sValue):
762 """
763 Checks if sValue is a pair.
764 """
765 _ = sValue;
766 return False;
767
768
769class TestTypeEflags(TestType):
770 """
771 Special value parsing for EFLAGS/RFLAGS/FLAGS.
772 """
773
774 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
775
776 def __init__(self, sName):
777 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
778
779 def get(self, sValue):
780 fClear = 0;
781 fSet = 0;
782 for sFlag in sValue.split(','):
783 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
784 if sConstant is None:
785 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
786 if sConstant[0] == '!':
787 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
788 else:
789 fSet |= g_kdX86EFlagsConstants[sConstant];
790
791 aoSet = TestType.get(self, '0x%x' % (fSet,));
792 if fClear != 0:
793 aoClear = TestType.get(self, '%#x' % (fClear,))
794 assert self.isAndOrPair(sValue) is True;
795 return (aoClear[0], aoSet[0]);
796 assert self.isAndOrPair(sValue) is False;
797 return aoSet;
798
799 def isAndOrPair(self, sValue):
800 for sZeroFlag in self.kdZeroValueFlags:
801 if sValue.find(sZeroFlag) >= 0:
802 return True;
803 return False;
804
805class TestTypeFromDict(TestType):
806 """
807 Special value parsing for CR0.
808 """
809
810 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
811
812 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
813 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
814 self.kdConstantsAndValues = kdConstantsAndValues;
815 self.sConstantPrefix = sConstantPrefix;
816
817 def get(self, sValue):
818 fValue = 0;
819 for sFlag in sValue.split(','):
820 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
821 if fFlagValue is None:
822 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
823 fValue |= fFlagValue;
824 return TestType.get(self, '0x%x' % (fValue,));
825
826
827class TestInOut(object):
828 """
829 One input or output state modifier.
830
831 This should be thought as values to modify BS3REGCTX and extended (needs
832 to be structured) state.
833 """
834 ## Assigned operators.
835 kasOperators = [
836 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
837 '&~=',
838 '&=',
839 '|=',
840 '='
841 ];
842 ## Types
843 kdTypes = {
844 'uint': TestType('uint', fUnsigned = True),
845 'int': TestType('int'),
846 'efl': TestTypeEflags('efl'),
847 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
848 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
849 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
850 };
851 ## CPU context fields.
852 kdFields = {
853 # name: ( default type, [both|input|output], )
854 # Operands.
855 'op1': ( 'uint', 'both', ), ## \@op1
856 'op2': ( 'uint', 'both', ), ## \@op2
857 'op3': ( 'uint', 'both', ), ## \@op3
858 'op4': ( 'uint', 'both', ), ## \@op4
859 # Flags.
860 'efl': ( 'efl', 'both', ),
861 'efl_undef': ( 'uint', 'output', ),
862 # 8-bit GPRs.
863 'al': ( 'uint', 'both', ),
864 'cl': ( 'uint', 'both', ),
865 'dl': ( 'uint', 'both', ),
866 'bl': ( 'uint', 'both', ),
867 'ah': ( 'uint', 'both', ),
868 'ch': ( 'uint', 'both', ),
869 'dh': ( 'uint', 'both', ),
870 'bh': ( 'uint', 'both', ),
871 'r8l': ( 'uint', 'both', ),
872 'r9l': ( 'uint', 'both', ),
873 'r10l': ( 'uint', 'both', ),
874 'r11l': ( 'uint', 'both', ),
875 'r12l': ( 'uint', 'both', ),
876 'r13l': ( 'uint', 'both', ),
877 'r14l': ( 'uint', 'both', ),
878 'r15l': ( 'uint', 'both', ),
879 # 16-bit GPRs.
880 'ax': ( 'uint', 'both', ),
881 'dx': ( 'uint', 'both', ),
882 'cx': ( 'uint', 'both', ),
883 'bx': ( 'uint', 'both', ),
884 'sp': ( 'uint', 'both', ),
885 'bp': ( 'uint', 'both', ),
886 'si': ( 'uint', 'both', ),
887 'di': ( 'uint', 'both', ),
888 'r8w': ( 'uint', 'both', ),
889 'r9w': ( 'uint', 'both', ),
890 'r10w': ( 'uint', 'both', ),
891 'r11w': ( 'uint', 'both', ),
892 'r12w': ( 'uint', 'both', ),
893 'r13w': ( 'uint', 'both', ),
894 'r14w': ( 'uint', 'both', ),
895 'r15w': ( 'uint', 'both', ),
896 # 32-bit GPRs.
897 'eax': ( 'uint', 'both', ),
898 'edx': ( 'uint', 'both', ),
899 'ecx': ( 'uint', 'both', ),
900 'ebx': ( 'uint', 'both', ),
901 'esp': ( 'uint', 'both', ),
902 'ebp': ( 'uint', 'both', ),
903 'esi': ( 'uint', 'both', ),
904 'edi': ( 'uint', 'both', ),
905 'r8d': ( 'uint', 'both', ),
906 'r9d': ( 'uint', 'both', ),
907 'r10d': ( 'uint', 'both', ),
908 'r11d': ( 'uint', 'both', ),
909 'r12d': ( 'uint', 'both', ),
910 'r13d': ( 'uint', 'both', ),
911 'r14d': ( 'uint', 'both', ),
912 'r15d': ( 'uint', 'both', ),
913 # 64-bit GPRs.
914 'rax': ( 'uint', 'both', ),
915 'rdx': ( 'uint', 'both', ),
916 'rcx': ( 'uint', 'both', ),
917 'rbx': ( 'uint', 'both', ),
918 'rsp': ( 'uint', 'both', ),
919 'rbp': ( 'uint', 'both', ),
920 'rsi': ( 'uint', 'both', ),
921 'rdi': ( 'uint', 'both', ),
922 'r8': ( 'uint', 'both', ),
923 'r9': ( 'uint', 'both', ),
924 'r10': ( 'uint', 'both', ),
925 'r11': ( 'uint', 'both', ),
926 'r12': ( 'uint', 'both', ),
927 'r13': ( 'uint', 'both', ),
928 'r14': ( 'uint', 'both', ),
929 'r15': ( 'uint', 'both', ),
930 # 16-bit, 32-bit or 64-bit registers according to operand size.
931 'oz.rax': ( 'uint', 'both', ),
932 'oz.rdx': ( 'uint', 'both', ),
933 'oz.rcx': ( 'uint', 'both', ),
934 'oz.rbx': ( 'uint', 'both', ),
935 'oz.rsp': ( 'uint', 'both', ),
936 'oz.rbp': ( 'uint', 'both', ),
937 'oz.rsi': ( 'uint', 'both', ),
938 'oz.rdi': ( 'uint', 'both', ),
939 'oz.r8': ( 'uint', 'both', ),
940 'oz.r9': ( 'uint', 'both', ),
941 'oz.r10': ( 'uint', 'both', ),
942 'oz.r11': ( 'uint', 'both', ),
943 'oz.r12': ( 'uint', 'both', ),
944 'oz.r13': ( 'uint', 'both', ),
945 'oz.r14': ( 'uint', 'both', ),
946 'oz.r15': ( 'uint', 'both', ),
947 # Control registers.
948 'cr0': ( 'cr0', 'both', ),
949 'cr4': ( 'cr4', 'both', ),
950 'xcr0': ( 'xcr0', 'both', ),
951 # FPU Registers
952 'fcw': ( 'uint', 'both', ),
953 'fsw': ( 'uint', 'both', ),
954 'ftw': ( 'uint', 'both', ),
955 'fop': ( 'uint', 'both', ),
956 'fpuip': ( 'uint', 'both', ),
957 'fpucs': ( 'uint', 'both', ),
958 'fpudp': ( 'uint', 'both', ),
959 'fpuds': ( 'uint', 'both', ),
960 'mxcsr': ( 'uint', 'both', ),
961 'st0': ( 'uint', 'both', ),
962 'st1': ( 'uint', 'both', ),
963 'st2': ( 'uint', 'both', ),
964 'st3': ( 'uint', 'both', ),
965 'st4': ( 'uint', 'both', ),
966 'st5': ( 'uint', 'both', ),
967 'st6': ( 'uint', 'both', ),
968 'st7': ( 'uint', 'both', ),
969 # MMX registers.
970 'mm0': ( 'uint', 'both', ),
971 'mm1': ( 'uint', 'both', ),
972 'mm2': ( 'uint', 'both', ),
973 'mm3': ( 'uint', 'both', ),
974 'mm4': ( 'uint', 'both', ),
975 'mm5': ( 'uint', 'both', ),
976 'mm6': ( 'uint', 'both', ),
977 'mm7': ( 'uint', 'both', ),
978 # SSE registers.
979 'xmm0': ( 'uint', 'both', ),
980 'xmm1': ( 'uint', 'both', ),
981 'xmm2': ( 'uint', 'both', ),
982 'xmm3': ( 'uint', 'both', ),
983 'xmm4': ( 'uint', 'both', ),
984 'xmm5': ( 'uint', 'both', ),
985 'xmm6': ( 'uint', 'both', ),
986 'xmm7': ( 'uint', 'both', ),
987 'xmm8': ( 'uint', 'both', ),
988 'xmm9': ( 'uint', 'both', ),
989 'xmm10': ( 'uint', 'both', ),
990 'xmm11': ( 'uint', 'both', ),
991 'xmm12': ( 'uint', 'both', ),
992 'xmm13': ( 'uint', 'both', ),
993 'xmm14': ( 'uint', 'both', ),
994 'xmm15': ( 'uint', 'both', ),
995 'xmm0.lo': ( 'uint', 'both', ),
996 'xmm1.lo': ( 'uint', 'both', ),
997 'xmm2.lo': ( 'uint', 'both', ),
998 'xmm3.lo': ( 'uint', 'both', ),
999 'xmm4.lo': ( 'uint', 'both', ),
1000 'xmm5.lo': ( 'uint', 'both', ),
1001 'xmm6.lo': ( 'uint', 'both', ),
1002 'xmm7.lo': ( 'uint', 'both', ),
1003 'xmm8.lo': ( 'uint', 'both', ),
1004 'xmm9.lo': ( 'uint', 'both', ),
1005 'xmm10.lo': ( 'uint', 'both', ),
1006 'xmm11.lo': ( 'uint', 'both', ),
1007 'xmm12.lo': ( 'uint', 'both', ),
1008 'xmm13.lo': ( 'uint', 'both', ),
1009 'xmm14.lo': ( 'uint', 'both', ),
1010 'xmm15.lo': ( 'uint', 'both', ),
1011 'xmm0.hi': ( 'uint', 'both', ),
1012 'xmm1.hi': ( 'uint', 'both', ),
1013 'xmm2.hi': ( 'uint', 'both', ),
1014 'xmm3.hi': ( 'uint', 'both', ),
1015 'xmm4.hi': ( 'uint', 'both', ),
1016 'xmm5.hi': ( 'uint', 'both', ),
1017 'xmm6.hi': ( 'uint', 'both', ),
1018 'xmm7.hi': ( 'uint', 'both', ),
1019 'xmm8.hi': ( 'uint', 'both', ),
1020 'xmm9.hi': ( 'uint', 'both', ),
1021 'xmm10.hi': ( 'uint', 'both', ),
1022 'xmm11.hi': ( 'uint', 'both', ),
1023 'xmm12.hi': ( 'uint', 'both', ),
1024 'xmm13.hi': ( 'uint', 'both', ),
1025 'xmm14.hi': ( 'uint', 'both', ),
1026 'xmm15.hi': ( 'uint', 'both', ),
1027 'xmm0.lo.zx': ( 'uint', 'both', ),
1028 'xmm1.lo.zx': ( 'uint', 'both', ),
1029 'xmm2.lo.zx': ( 'uint', 'both', ),
1030 'xmm3.lo.zx': ( 'uint', 'both', ),
1031 'xmm4.lo.zx': ( 'uint', 'both', ),
1032 'xmm5.lo.zx': ( 'uint', 'both', ),
1033 'xmm6.lo.zx': ( 'uint', 'both', ),
1034 'xmm7.lo.zx': ( 'uint', 'both', ),
1035 'xmm8.lo.zx': ( 'uint', 'both', ),
1036 'xmm9.lo.zx': ( 'uint', 'both', ),
1037 'xmm10.lo.zx': ( 'uint', 'both', ),
1038 'xmm11.lo.zx': ( 'uint', 'both', ),
1039 'xmm12.lo.zx': ( 'uint', 'both', ),
1040 'xmm13.lo.zx': ( 'uint', 'both', ),
1041 'xmm14.lo.zx': ( 'uint', 'both', ),
1042 'xmm15.lo.zx': ( 'uint', 'both', ),
1043 'xmm0.dw0': ( 'uint', 'both', ),
1044 'xmm1.dw0': ( 'uint', 'both', ),
1045 'xmm2.dw0': ( 'uint', 'both', ),
1046 'xmm3.dw0': ( 'uint', 'both', ),
1047 'xmm4.dw0': ( 'uint', 'both', ),
1048 'xmm5.dw0': ( 'uint', 'both', ),
1049 'xmm6.dw0': ( 'uint', 'both', ),
1050 'xmm7.dw0': ( 'uint', 'both', ),
1051 'xmm8.dw0': ( 'uint', 'both', ),
1052 'xmm9.dw0': ( 'uint', 'both', ),
1053 'xmm10.dw0': ( 'uint', 'both', ),
1054 'xmm11.dw0': ( 'uint', 'both', ),
1055 'xmm12.dw0': ( 'uint', 'both', ),
1056 'xmm13.dw0': ( 'uint', 'both', ),
1057 'xmm14.dw0': ( 'uint', 'both', ),
1058 'xmm15_dw0': ( 'uint', 'both', ),
1059 # AVX registers.
1060 'ymm0': ( 'uint', 'both', ),
1061 'ymm1': ( 'uint', 'both', ),
1062 'ymm2': ( 'uint', 'both', ),
1063 'ymm3': ( 'uint', 'both', ),
1064 'ymm4': ( 'uint', 'both', ),
1065 'ymm5': ( 'uint', 'both', ),
1066 'ymm6': ( 'uint', 'both', ),
1067 'ymm7': ( 'uint', 'both', ),
1068 'ymm8': ( 'uint', 'both', ),
1069 'ymm9': ( 'uint', 'both', ),
1070 'ymm10': ( 'uint', 'both', ),
1071 'ymm11': ( 'uint', 'both', ),
1072 'ymm12': ( 'uint', 'both', ),
1073 'ymm13': ( 'uint', 'both', ),
1074 'ymm14': ( 'uint', 'both', ),
1075 'ymm15': ( 'uint', 'both', ),
1076
1077 # Special ones.
1078 'value.xcpt': ( 'uint', 'output', ),
1079 };
1080
1081 def __init__(self, sField, sOp, sValue, sType):
1082 assert sField in self.kdFields;
1083 assert sOp in self.kasOperators;
1084 self.sField = sField;
1085 self.sOp = sOp;
1086 self.sValue = sValue;
1087 self.sType = sType;
1088 assert isinstance(sField, str);
1089 assert isinstance(sOp, str);
1090 assert isinstance(sType, str);
1091 assert isinstance(sValue, str);
1092
1093
1094class TestSelector(object):
1095 """
1096 One selector for an instruction test.
1097 """
1098 ## Selector compare operators.
1099 kasCompareOps = [ '==', '!=' ];
1100 ## Selector variables and their valid values.
1101 kdVariables = {
1102 # Operand size.
1103 'size': {
1104 'o16': 'size_o16',
1105 'o32': 'size_o32',
1106 'o64': 'size_o64',
1107 },
1108 # Execution ring.
1109 'ring': {
1110 '0': 'ring_0',
1111 '1': 'ring_1',
1112 '2': 'ring_2',
1113 '3': 'ring_3',
1114 '0..2': 'ring_0_thru_2',
1115 '1..3': 'ring_1_thru_3',
1116 },
1117 # Basic code mode.
1118 'codebits': {
1119 '64': 'code_64bit',
1120 '32': 'code_32bit',
1121 '16': 'code_16bit',
1122 },
1123 # cpu modes.
1124 'mode': {
1125 'real': 'mode_real',
1126 'prot': 'mode_prot',
1127 'long': 'mode_long',
1128 'v86': 'mode_v86',
1129 'smm': 'mode_smm',
1130 'vmx': 'mode_vmx',
1131 'svm': 'mode_svm',
1132 },
1133 # paging on/off
1134 'paging': {
1135 'on': 'paging_on',
1136 'off': 'paging_off',
1137 },
1138 # CPU vendor
1139 'vendor': {
1140 'amd': 'vendor_amd',
1141 'intel': 'vendor_intel',
1142 'via': 'vendor_via',
1143 },
1144 };
1145 ## Selector shorthand predicates.
1146 ## These translates into variable expressions.
1147 kdPredicates = {
1148 'o16': 'size==o16',
1149 'o32': 'size==o32',
1150 'o64': 'size==o64',
1151 'ring0': 'ring==0',
1152 '!ring0': 'ring==1..3',
1153 'ring1': 'ring==1',
1154 'ring2': 'ring==2',
1155 'ring3': 'ring==3',
1156 'user': 'ring==3',
1157 'supervisor': 'ring==0..2',
1158 'real': 'mode==real',
1159 'prot': 'mode==prot',
1160 'long': 'mode==long',
1161 'v86': 'mode==v86',
1162 'smm': 'mode==smm',
1163 'vmx': 'mode==vmx',
1164 'svm': 'mode==svm',
1165 'paging': 'paging==on',
1166 '!paging': 'paging==off',
1167 'amd': 'vendor==amd',
1168 '!amd': 'vendor!=amd',
1169 'intel': 'vendor==intel',
1170 '!intel': 'vendor!=intel',
1171 'via': 'vendor==via',
1172 '!via': 'vendor!=via',
1173 };
1174
1175 def __init__(self, sVariable, sOp, sValue):
1176 assert sVariable in self.kdVariables;
1177 assert sOp in self.kasCompareOps;
1178 assert sValue in self.kdVariables[sVariable];
1179 self.sVariable = sVariable;
1180 self.sOp = sOp;
1181 self.sValue = sValue;
1182
1183
1184class InstructionTest(object):
1185 """
1186 Instruction test.
1187 """
1188
1189 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1190 self.oInstr = oInstr; # type: InstructionTest
1191 self.aoInputs = []; # type: list(TestInOut)
1192 self.aoOutputs = []; # type: list(TestInOut)
1193 self.aoSelectors = []; # type: list(TestSelector)
1194
1195 def toString(self, fRepr = False):
1196 """
1197 Converts it to string representation.
1198 """
1199 asWords = [];
1200 if self.aoSelectors:
1201 for oSelector in self.aoSelectors:
1202 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1203 asWords.append('/');
1204
1205 for oModifier in self.aoInputs:
1206 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1207
1208 asWords.append('->');
1209
1210 for oModifier in self.aoOutputs:
1211 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1212
1213 if fRepr:
1214 return '<' + ' '.join(asWords) + '>';
1215 return ' '.join(asWords);
1216
1217 def __str__(self):
1218 """ Provide string represenation. """
1219 return self.toString(False);
1220
1221 def __repr__(self):
1222 """ Provide unambigious string representation. """
1223 return self.toString(True);
1224
1225class Operand(object):
1226 """
1227 Instruction operand.
1228 """
1229
1230 def __init__(self, sWhere, sType):
1231 assert sWhere in g_kdOpLocations, sWhere;
1232 assert sType in g_kdOpTypes, sType;
1233 self.sWhere = sWhere; ##< g_kdOpLocations
1234 self.sType = sType; ##< g_kdOpTypes
1235
1236 def usesModRM(self):
1237 """ Returns True if using some form of ModR/M encoding. """
1238 return self.sType[0] in ['E', 'G', 'M'];
1239
1240
1241
1242class Instruction(object): # pylint: disable=too-many-instance-attributes
1243 """
1244 Instruction.
1245 """
1246
1247 def __init__(self, sSrcFile, iLine):
1248 ## @name Core attributes.
1249 ## @{
1250 self.sMnemonic = None;
1251 self.sBrief = None;
1252 self.asDescSections = []; # type: list(str)
1253 self.aoMaps = []; # type: list(InstructionMap)
1254 self.aoOperands = []; # type: list(Operand)
1255 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1256 self.sOpcode = None; # type: str
1257 self.sSubOpcode = None; # type: str
1258 self.sEncoding = None;
1259 self.asFlTest = None;
1260 self.asFlModify = None;
1261 self.asFlUndefined = None;
1262 self.asFlSet = None;
1263 self.asFlClear = None;
1264 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1265 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1266 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1267 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1268 self.aoTests = []; # type: list(InstructionTest)
1269 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1270 self.oCpuExpr = None; ##< Some CPU restriction expression...
1271 self.sGroup = None;
1272 self.fUnused = False; ##< Unused instruction.
1273 self.fInvalid = False; ##< Invalid instruction (like UD2).
1274 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1275 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1276 ## @}
1277
1278 ## @name Implementation attributes.
1279 ## @{
1280 self.sStats = None;
1281 self.sFunction = None;
1282 self.fStub = False;
1283 self.fUdStub = False;
1284 ## @}
1285
1286 ## @name Decoding info
1287 ## @{
1288 self.sSrcFile = sSrcFile;
1289 self.iLineCreated = iLine;
1290 self.iLineCompleted = None;
1291 self.cOpTags = 0;
1292 self.iLineFnIemOpMacro = -1;
1293 self.iLineMnemonicMacro = -1;
1294 ## @}
1295
1296 ## @name Intermediate input fields.
1297 ## @{
1298 self.sRawDisOpNo = None;
1299 self.asRawDisParams = [];
1300 self.sRawIemOpFlags = None;
1301 self.sRawOldOpcodes = None;
1302 self.asCopyTests = [];
1303 ## @}
1304
1305 def toString(self, fRepr = False):
1306 """ Turn object into a string. """
1307 aasFields = [];
1308
1309 aasFields.append(['opcode', self.sOpcode]);
1310 aasFields.append(['mnemonic', self.sMnemonic]);
1311 for iOperand, oOperand in enumerate(self.aoOperands):
1312 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1313 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1314 aasFields.append(['encoding', self.sEncoding]);
1315 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1316 aasFields.append(['disenum', self.sDisEnum]);
1317 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1318 aasFields.append(['group', self.sGroup]);
1319 if self.fUnused: aasFields.append(['unused', 'True']);
1320 if self.fInvalid: aasFields.append(['invalid', 'True']);
1321 aasFields.append(['invlstyle', self.sInvalidStyle]);
1322 aasFields.append(['fltest', self.asFlTest]);
1323 aasFields.append(['flmodify', self.asFlModify]);
1324 aasFields.append(['flundef', self.asFlUndefined]);
1325 aasFields.append(['flset', self.asFlSet]);
1326 aasFields.append(['flclear', self.asFlClear]);
1327 aasFields.append(['mincpu', self.sMinCpu]);
1328 aasFields.append(['stats', self.sStats]);
1329 aasFields.append(['sFunction', self.sFunction]);
1330 if self.fStub: aasFields.append(['fStub', 'True']);
1331 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1332 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1333 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1334 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1335
1336 sRet = '<' if fRepr else '';
1337 for sField, sValue in aasFields:
1338 if sValue != None:
1339 if len(sRet) > 1:
1340 sRet += '; ';
1341 sRet += '%s=%s' % (sField, sValue,);
1342 if fRepr:
1343 sRet += '>';
1344
1345 return sRet;
1346
1347 def __str__(self):
1348 """ Provide string represenation. """
1349 return self.toString(False);
1350
1351 def __repr__(self):
1352 """ Provide unambigious string representation. """
1353 return self.toString(True);
1354
1355 def getOpcodeByte(self):
1356 """
1357 Decodes sOpcode into a byte range integer value.
1358 Raises exception if sOpcode is None or invalid.
1359 """
1360 if self.sOpcode is None:
1361 raise Exception('No opcode byte for %s!' % (self,));
1362 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1363
1364 # Full hex byte form.
1365 if sOpcode[:2] == '0x':
1366 return int(sOpcode, 16);
1367
1368 # The /r form:
1369 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1370 return int(sOpcode[-1:]) << 3;
1371
1372 # The 11/r form:
1373 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1374 return (int(sOpcode[-1:]) << 3) | 0xc0;
1375
1376 # The !11/r form (returns mod=1):
1377 ## @todo this doesn't really work...
1378 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1379 return (int(sOpcode[-1:]) << 3) | 0x80;
1380
1381 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1382
1383 @staticmethod
1384 def _flagsToIntegerMask(asFlags):
1385 """
1386 Returns the integer mask value for asFlags.
1387 """
1388 uRet = 0;
1389 if asFlags:
1390 for sFlag in asFlags:
1391 sConstant = g_kdEFlagsMnemonics[sFlag];
1392 assert sConstant[0] != '!', sConstant
1393 uRet |= g_kdX86EFlagsConstants[sConstant];
1394 return uRet;
1395
1396 def getTestedFlagsMask(self):
1397 """ Returns asFlTest into a integer mask value """
1398 return self._flagsToIntegerMask(self.asFlTest);
1399
1400 def getModifiedFlagsMask(self):
1401 """ Returns asFlModify into a integer mask value """
1402 return self._flagsToIntegerMask(self.asFlModify);
1403
1404 def getUndefinedFlagsMask(self):
1405 """ Returns asFlUndefined into a integer mask value """
1406 return self._flagsToIntegerMask(self.asFlUndefined);
1407
1408 def getSetFlagsMask(self):
1409 """ Returns asFlSet into a integer mask value """
1410 return self._flagsToIntegerMask(self.asFlSet);
1411
1412 def getClearedFlagsMask(self):
1413 """ Returns asFlClear into a integer mask value """
1414 return self._flagsToIntegerMask(self.asFlClear);
1415
1416
1417## All the instructions.
1418g_aoAllInstructions = []; # type: list(Instruction)
1419
1420## All the instructions indexed by statistics name (opstat).
1421g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1422
1423## All the instructions indexed by function name (opfunction).
1424g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1425
1426## Instructions tagged by oponlytest
1427g_aoOnlyTestInstructions = []; # type: list(Instruction)
1428
1429## Instruction maps.
1430g_dInstructionMaps = {
1431 'one': InstructionMap('one'),
1432 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1433 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1434 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1435 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1436 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1437 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1438 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1439 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1440 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1441 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1442 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1443 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1444 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1445 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1446 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1447 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1448 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1449 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1450 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1451
1452 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1453 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1454 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1455 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1456 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1457 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1458 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1459 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1460 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1461 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1462 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1463 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1464 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1465 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1466
1467 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1468 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1469
1470 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1471 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1472 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1473 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1474 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1475 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1476
1477 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1478 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1479
1480 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1481 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1482 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1483 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1484 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1485 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1486 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1487 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1488};
1489
1490
1491
1492class ParserException(Exception):
1493 """ Parser exception """
1494 def __init__(self, sMessage):
1495 Exception.__init__(self, sMessage);
1496
1497
1498class SimpleParser(object):
1499 """
1500 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1501 """
1502
1503 ## @name Parser state.
1504 ## @{
1505 kiCode = 0;
1506 kiCommentMulti = 1;
1507 ## @}
1508
1509 def __init__(self, sSrcFile, asLines, sDefaultMap):
1510 self.sSrcFile = sSrcFile;
1511 self.asLines = asLines;
1512 self.iLine = 0;
1513 self.iState = self.kiCode;
1514 self.sComment = '';
1515 self.iCommentLine = 0;
1516 self.aoCurInstrs = [];
1517
1518 assert sDefaultMap in g_dInstructionMaps;
1519 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1520
1521 self.cTotalInstr = 0;
1522 self.cTotalStubs = 0;
1523 self.cTotalTagged = 0;
1524
1525 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1526 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1527 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1528 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1529 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1530 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1531 self.fDebug = True;
1532
1533 self.dTagHandlers = {
1534 '@opbrief': self.parseTagOpBrief,
1535 '@opdesc': self.parseTagOpDesc,
1536 '@opmnemonic': self.parseTagOpMnemonic,
1537 '@op1': self.parseTagOpOperandN,
1538 '@op2': self.parseTagOpOperandN,
1539 '@op3': self.parseTagOpOperandN,
1540 '@op4': self.parseTagOpOperandN,
1541 '@oppfx': self.parseTagOpPfx,
1542 '@opmaps': self.parseTagOpMaps,
1543 '@opcode': self.parseTagOpcode,
1544 '@opcodesub': self.parseTagOpcodeSub,
1545 '@openc': self.parseTagOpEnc,
1546 '@opfltest': self.parseTagOpEFlags,
1547 '@opflmodify': self.parseTagOpEFlags,
1548 '@opflundef': self.parseTagOpEFlags,
1549 '@opflset': self.parseTagOpEFlags,
1550 '@opflclear': self.parseTagOpEFlags,
1551 '@ophints': self.parseTagOpHints,
1552 '@opdisenum': self.parseTagOpDisEnum,
1553 '@opmincpu': self.parseTagOpMinCpu,
1554 '@opcpuid': self.parseTagOpCpuId,
1555 '@opgroup': self.parseTagOpGroup,
1556 '@opunused': self.parseTagOpUnusedInvalid,
1557 '@opinvalid': self.parseTagOpUnusedInvalid,
1558 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1559 '@optest': self.parseTagOpTest,
1560 '@optestign': self.parseTagOpTestIgnore,
1561 '@optestignore': self.parseTagOpTestIgnore,
1562 '@opcopytests': self.parseTagOpCopyTests,
1563 '@oponly': self.parseTagOpOnlyTest,
1564 '@oponlytest': self.parseTagOpOnlyTest,
1565 '@opxcpttype': self.parseTagOpXcptType,
1566 '@opstats': self.parseTagOpStats,
1567 '@opfunction': self.parseTagOpFunction,
1568 '@opdone': self.parseTagOpDone,
1569 };
1570 for i in range(48):
1571 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1572 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1573
1574 self.asErrors = [];
1575
1576 def raiseError(self, sMessage):
1577 """
1578 Raise error prefixed with the source and line number.
1579 """
1580 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1581
1582 def raiseCommentError(self, iLineInComment, sMessage):
1583 """
1584 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1585 """
1586 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1587
1588 def error(self, sMessage):
1589 """
1590 Adds an error.
1591 returns False;
1592 """
1593 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1594 return False;
1595
1596 def errorComment(self, iLineInComment, sMessage):
1597 """
1598 Adds a comment error.
1599 returns False;
1600 """
1601 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1602 return False;
1603
1604 def printErrors(self):
1605 """
1606 Print the errors to stderr.
1607 Returns number of errors.
1608 """
1609 if self.asErrors:
1610 sys.stderr.write(u''.join(self.asErrors));
1611 return len(self.asErrors);
1612
1613 def debug(self, sMessage):
1614 """
1615 For debugging.
1616 """
1617 if self.fDebug:
1618 print('debug: %s' % (sMessage,));
1619
1620
1621 def addInstruction(self, iLine = None):
1622 """
1623 Adds an instruction.
1624 """
1625 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1626 g_aoAllInstructions.append(oInstr);
1627 self.aoCurInstrs.append(oInstr);
1628 return oInstr;
1629
1630 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1631 """
1632 Derives the mnemonic and operands from a IEM stats base name like string.
1633 """
1634 if oInstr.sMnemonic is None:
1635 asWords = sStats.split('_');
1636 oInstr.sMnemonic = asWords[0].lower();
1637 if len(asWords) > 1 and not oInstr.aoOperands:
1638 for sType in asWords[1:]:
1639 if sType in g_kdOpTypes:
1640 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1641 else:
1642 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1643 return False;
1644 return True;
1645
1646 def doneInstructionOne(self, oInstr, iLine):
1647 """
1648 Complete the parsing by processing, validating and expanding raw inputs.
1649 """
1650 assert oInstr.iLineCompleted is None;
1651 oInstr.iLineCompleted = iLine;
1652
1653 #
1654 # Specified instructions.
1655 #
1656 if oInstr.cOpTags > 0:
1657 if oInstr.sStats is None:
1658 pass;
1659
1660 #
1661 # Unspecified legacy stuff. We generally only got a few things to go on here.
1662 # /** Opcode 0x0f 0x00 /0. */
1663 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1664 #
1665 else:
1666 #if oInstr.sRawOldOpcodes:
1667 #
1668 #if oInstr.sMnemonic:
1669 pass;
1670
1671 #
1672 # Common defaults.
1673 #
1674
1675 # Guess mnemonic and operands from stats if the former is missing.
1676 if oInstr.sMnemonic is None:
1677 if oInstr.sStats is not None:
1678 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1679 elif oInstr.sFunction is not None:
1680 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1681
1682 # Derive the disassembler op enum constant from the mnemonic.
1683 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1684 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1685
1686 # Derive the IEM statistics base name from mnemonic and operand types.
1687 if oInstr.sStats is None:
1688 if oInstr.sFunction is not None:
1689 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1690 elif oInstr.sMnemonic is not None:
1691 oInstr.sStats = oInstr.sMnemonic;
1692 for oOperand in oInstr.aoOperands:
1693 if oOperand.sType:
1694 oInstr.sStats += '_' + oOperand.sType;
1695
1696 # Derive the IEM function name from mnemonic and operand types.
1697 if oInstr.sFunction is None:
1698 if oInstr.sMnemonic is not None:
1699 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1700 for oOperand in oInstr.aoOperands:
1701 if oOperand.sType:
1702 oInstr.sFunction += '_' + oOperand.sType;
1703 elif oInstr.sStats:
1704 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1705
1706 # Derive encoding from operands.
1707 if oInstr.sEncoding is None:
1708 if not oInstr.aoOperands:
1709 if oInstr.fUnused and oInstr.sSubOpcode:
1710 oInstr.sEncoding = 'ModR/M';
1711 else:
1712 oInstr.sEncoding = 'fixed';
1713 elif oInstr.aoOperands[0].usesModRM():
1714 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1715 oInstr.sEncoding = 'ModR/M+VEX';
1716 else:
1717 oInstr.sEncoding = 'ModR/M';
1718
1719 #
1720 # Apply default map and then add the instruction to all it's groups.
1721 #
1722 if not oInstr.aoMaps:
1723 oInstr.aoMaps = [ self.oDefaultMap, ];
1724 for oMap in oInstr.aoMaps:
1725 oMap.aoInstructions.append(oInstr);
1726
1727 #
1728 # Check the opstat value and add it to the opstat indexed dictionary.
1729 #
1730 if oInstr.sStats:
1731 if oInstr.sStats not in g_dAllInstructionsByStat:
1732 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1733 else:
1734 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1735 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1736
1737 #
1738 # Add to function indexed dictionary. We allow multiple instructions per function.
1739 #
1740 if oInstr.sFunction:
1741 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1742 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1743 else:
1744 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1745
1746 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1747 return True;
1748
1749 def doneInstructions(self, iLineInComment = None):
1750 """
1751 Done with current instruction.
1752 """
1753 for oInstr in self.aoCurInstrs:
1754 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1755 if oInstr.fStub:
1756 self.cTotalStubs += 1;
1757
1758 self.cTotalInstr += len(self.aoCurInstrs);
1759
1760 self.sComment = '';
1761 self.aoCurInstrs = [];
1762 return True;
1763
1764 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1765 """
1766 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1767 is False, only None values and empty strings are replaced.
1768 """
1769 for oInstr in self.aoCurInstrs:
1770 if fOverwrite is not True:
1771 oOldValue = getattr(oInstr, sAttrib);
1772 if oOldValue is not None:
1773 continue;
1774 setattr(oInstr, sAttrib, oValue);
1775
1776 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1777 """
1778 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1779 If fOverwrite is False, only None values and empty strings are replaced.
1780 """
1781 for oInstr in self.aoCurInstrs:
1782 aoArray = getattr(oInstr, sAttrib);
1783 while len(aoArray) <= iEntry:
1784 aoArray.append(None);
1785 if fOverwrite is True or aoArray[iEntry] is None:
1786 aoArray[iEntry] = oValue;
1787
1788 def parseCommentOldOpcode(self, asLines):
1789 """ Deals with 'Opcode 0xff /4' like comments """
1790 asWords = asLines[0].split();
1791 if len(asWords) >= 2 \
1792 and asWords[0] == 'Opcode' \
1793 and ( asWords[1].startswith('0x')
1794 or asWords[1].startswith('0X')):
1795 asWords = asWords[:1];
1796 for iWord, sWord in enumerate(asWords):
1797 if sWord.startswith('0X'):
1798 sWord = '0x' + sWord[:2];
1799 asWords[iWord] = asWords;
1800 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1801
1802 return False;
1803
1804 def ensureInstructionForOpTag(self, iTagLine):
1805 """ Ensure there is an instruction for the op-tag being parsed. """
1806 if not self.aoCurInstrs:
1807 self.addInstruction(self.iCommentLine + iTagLine);
1808 for oInstr in self.aoCurInstrs:
1809 oInstr.cOpTags += 1;
1810 if oInstr.cOpTags == 1:
1811 self.cTotalTagged += 1;
1812 return self.aoCurInstrs[-1];
1813
1814 @staticmethod
1815 def flattenSections(aasSections):
1816 """
1817 Flattens multiline sections into stripped single strings.
1818 Returns list of strings, on section per string.
1819 """
1820 asRet = [];
1821 for asLines in aasSections:
1822 if asLines:
1823 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1824 return asRet;
1825
1826 @staticmethod
1827 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1828 """
1829 Flattens sections into a simple stripped string with newlines as
1830 section breaks. The final section does not sport a trailing newline.
1831 """
1832 # Typical: One section with a single line.
1833 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1834 return aasSections[0][0].strip();
1835
1836 sRet = '';
1837 for iSection, asLines in enumerate(aasSections):
1838 if asLines:
1839 if iSection > 0:
1840 sRet += sSectionSep;
1841 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1842 return sRet;
1843
1844
1845
1846 ## @name Tag parsers
1847 ## @{
1848
1849 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1850 """
1851 Tag: \@opbrief
1852 Value: Text description, multiple sections, appended.
1853
1854 Brief description. If not given, it's the first sentence from @opdesc.
1855 """
1856 oInstr = self.ensureInstructionForOpTag(iTagLine);
1857
1858 # Flatten and validate the value.
1859 sBrief = self.flattenAllSections(aasSections);
1860 if not sBrief:
1861 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1862 if sBrief[-1] != '.':
1863 sBrief = sBrief + '.';
1864 if len(sBrief) > 180:
1865 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1866 offDot = sBrief.find('.');
1867 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1868 offDot = sBrief.find('.', offDot + 1);
1869 if offDot >= 0 and offDot != len(sBrief) - 1:
1870 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1871
1872 # Update the instruction.
1873 if oInstr.sBrief is not None:
1874 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1875 % (sTag, oInstr.sBrief, sBrief,));
1876 _ = iEndLine;
1877 return True;
1878
1879 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1880 """
1881 Tag: \@opdesc
1882 Value: Text description, multiple sections, appended.
1883
1884 It is used to describe instructions.
1885 """
1886 oInstr = self.ensureInstructionForOpTag(iTagLine);
1887 if aasSections:
1888 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1889 return True;
1890
1891 _ = sTag; _ = iEndLine;
1892 return True;
1893
1894 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1895 """
1896 Tag: @opmenmonic
1897 Value: mnemonic
1898
1899 The 'mnemonic' value must be a valid C identifier string. Because of
1900 prefixes, groups and whatnot, there times when the mnemonic isn't that
1901 of an actual assembler mnemonic.
1902 """
1903 oInstr = self.ensureInstructionForOpTag(iTagLine);
1904
1905 # Flatten and validate the value.
1906 sMnemonic = self.flattenAllSections(aasSections);
1907 if not self.oReMnemonic.match(sMnemonic):
1908 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1909 if oInstr.sMnemonic is not None:
1910 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1911 % (sTag, oInstr.sMnemonic, sMnemonic,));
1912 oInstr.sMnemonic = sMnemonic
1913
1914 _ = iEndLine;
1915 return True;
1916
1917 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1918 """
1919 Tags: \@op1, \@op2, \@op3, \@op4
1920 Value: [where:]type
1921
1922 The 'where' value indicates where the operand is found, like the 'reg'
1923 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1924 a list.
1925
1926 The 'type' value indicates the operand type. These follow the types
1927 given in the opcode tables in the CPU reference manuals.
1928 See Instruction.kdOperandTypes for a list.
1929
1930 """
1931 oInstr = self.ensureInstructionForOpTag(iTagLine);
1932 idxOp = int(sTag[-1]) - 1;
1933 assert idxOp >= 0 and idxOp < 4;
1934
1935 # flatten, split up, and validate the "where:type" value.
1936 sFlattened = self.flattenAllSections(aasSections);
1937 asSplit = sFlattened.split(':');
1938 if len(asSplit) == 1:
1939 sType = asSplit[0];
1940 sWhere = None;
1941 elif len(asSplit) == 2:
1942 (sWhere, sType) = asSplit;
1943 else:
1944 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1945
1946 if sType not in g_kdOpTypes:
1947 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1948 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1949 if sWhere is None:
1950 sWhere = g_kdOpTypes[sType][1];
1951 elif sWhere not in g_kdOpLocations:
1952 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1953 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1954
1955 # Insert the operand, refusing to overwrite an existing one.
1956 while idxOp >= len(oInstr.aoOperands):
1957 oInstr.aoOperands.append(None);
1958 if oInstr.aoOperands[idxOp] is not None:
1959 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1960 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1961 sWhere, sType,));
1962 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1963
1964 _ = iEndLine;
1965 return True;
1966
1967 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1968 """
1969 Tag: \@opmaps
1970 Value: map[,map2]
1971
1972 Indicates which maps the instruction is in. There is a default map
1973 associated with each input file.
1974 """
1975 oInstr = self.ensureInstructionForOpTag(iTagLine);
1976
1977 # Flatten, split up and validate the value.
1978 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1979 asMaps = sFlattened.split(',');
1980 if not asMaps:
1981 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1982 for sMap in asMaps:
1983 if sMap not in g_dInstructionMaps:
1984 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1985 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1986
1987 # Add the maps to the current list. Throw errors on duplicates.
1988 for oMap in oInstr.aoMaps:
1989 if oMap.sName in asMaps:
1990 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1991
1992 for sMap in asMaps:
1993 oMap = g_dInstructionMaps[sMap];
1994 if oMap not in oInstr.aoMaps:
1995 oInstr.aoMaps.append(oMap);
1996 else:
1997 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1998
1999 _ = iEndLine;
2000 return True;
2001
2002 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2003 """
2004 Tag: \@oppfx
2005 Value: n/a|none|0x66|0xf3|0xf2
2006
2007 Required prefix for the instruction. (In a (E)VEX context this is the
2008 value of the 'pp' field rather than an actual prefix.)
2009 """
2010 oInstr = self.ensureInstructionForOpTag(iTagLine);
2011
2012 # Flatten and validate the value.
2013 sFlattened = self.flattenAllSections(aasSections);
2014 asPrefixes = sFlattened.split();
2015 if len(asPrefixes) > 1:
2016 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2017
2018 sPrefix = asPrefixes[0].lower();
2019 if sPrefix == 'none':
2020 sPrefix = 'none';
2021 elif sPrefix == 'n/a':
2022 sPrefix = None;
2023 else:
2024 if len(sPrefix) == 2:
2025 sPrefix = '0x' + sPrefix;
2026 if not _isValidOpcodeByte(sPrefix):
2027 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2028
2029 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2030 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2031
2032 # Set it.
2033 if oInstr.sPrefix is not None:
2034 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2035 oInstr.sPrefix = sPrefix;
2036
2037 _ = iEndLine;
2038 return True;
2039
2040 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2041 """
2042 Tag: \@opcode
2043 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2044
2045 The opcode byte or sub-byte for the instruction in the context of a map.
2046 """
2047 oInstr = self.ensureInstructionForOpTag(iTagLine);
2048
2049 # Flatten and validate the value.
2050 sOpcode = self.flattenAllSections(aasSections);
2051 if _isValidOpcodeByte(sOpcode):
2052 pass;
2053 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2054 pass;
2055 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2056 pass;
2057 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2058 pass;
2059 else:
2060 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2061
2062 # Set it.
2063 if oInstr.sOpcode is not None:
2064 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2065 oInstr.sOpcode = sOpcode;
2066
2067 _ = iEndLine;
2068 return True;
2069
2070 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2071 """
2072 Tag: \@opcodesub
2073 Value: none | 11 mr/reg | !11 mr/reg
2074
2075 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2076 represents exactly two different instructions. The more proper way would
2077 be to go via maps with two members, but this is faster.
2078 """
2079 oInstr = self.ensureInstructionForOpTag(iTagLine);
2080
2081 # Flatten and validate the value.
2082 sSubOpcode = self.flattenAllSections(aasSections);
2083 if sSubOpcode not in g_kdSubOpcodes:
2084 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2085 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2086
2087 # Set it.
2088 if oInstr.sSubOpcode is not None:
2089 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2090 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2091 oInstr.sSubOpcode = sSubOpcode;
2092
2093 _ = iEndLine;
2094 return True;
2095
2096 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2097 """
2098 Tag: \@openc
2099 Value: ModR/M|fixed|prefix|<map name>
2100
2101 The instruction operand encoding style.
2102 """
2103 oInstr = self.ensureInstructionForOpTag(iTagLine);
2104
2105 # Flatten and validate the value.
2106 sEncoding = self.flattenAllSections(aasSections);
2107 if sEncoding in g_kdEncodings:
2108 pass;
2109 elif sEncoding in g_dInstructionMaps:
2110 pass;
2111 elif not _isValidOpcodeByte(sEncoding):
2112 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2113
2114 # Set it.
2115 if oInstr.sEncoding is not None:
2116 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2117 % ( sTag, oInstr.sEncoding, sEncoding,));
2118 oInstr.sEncoding = sEncoding;
2119
2120 _ = iEndLine;
2121 return True;
2122
2123 ## EFlags tag to Instruction attribute name.
2124 kdOpFlagToAttr = {
2125 '@opfltest': 'asFlTest',
2126 '@opflmodify': 'asFlModify',
2127 '@opflundef': 'asFlUndefined',
2128 '@opflset': 'asFlSet',
2129 '@opflclear': 'asFlClear',
2130 };
2131
2132 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2133 """
2134 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2135 Value: <eflags specifier>
2136
2137 """
2138 oInstr = self.ensureInstructionForOpTag(iTagLine);
2139
2140 # Flatten, split up and validate the values.
2141 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2142 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2143 asFlags = [];
2144 else:
2145 fRc = True;
2146 for iFlag, sFlag in enumerate(asFlags):
2147 if sFlag not in g_kdEFlagsMnemonics:
2148 if sFlag.strip() in g_kdEFlagsMnemonics:
2149 asFlags[iFlag] = sFlag.strip();
2150 else:
2151 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2152 if not fRc:
2153 return False;
2154
2155 # Set them.
2156 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2157 if asOld is not None:
2158 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2159 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2160
2161 _ = iEndLine;
2162 return True;
2163
2164 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2165 """
2166 Tag: \@ophints
2167 Value: Comma or space separated list of flags and hints.
2168
2169 This covers the disassembler flags table and more.
2170 """
2171 oInstr = self.ensureInstructionForOpTag(iTagLine);
2172
2173 # Flatten as a space separated list, split it up and validate the values.
2174 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2175 if len(asHints) == 1 and asHints[0].lower() == 'none':
2176 asHints = [];
2177 else:
2178 fRc = True;
2179 for iHint, sHint in enumerate(asHints):
2180 if sHint not in g_kdHints:
2181 if sHint.strip() in g_kdHints:
2182 sHint[iHint] = sHint.strip();
2183 else:
2184 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2185 if not fRc:
2186 return False;
2187
2188 # Append them.
2189 for sHint in asHints:
2190 if sHint not in oInstr.dHints:
2191 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2192 else:
2193 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2194
2195 _ = iEndLine;
2196 return True;
2197
2198 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2199 """
2200 Tag: \@opdisenum
2201 Value: OP_XXXX
2202
2203 This is for select a specific (legacy) disassembler enum value for the
2204 instruction.
2205 """
2206 oInstr = self.ensureInstructionForOpTag(iTagLine);
2207
2208 # Flatten and split.
2209 asWords = self.flattenAllSections(aasSections).split();
2210 if len(asWords) != 1:
2211 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2212 if not asWords:
2213 return False;
2214 sDisEnum = asWords[0];
2215 if not self.oReDisEnum.match(sDisEnum):
2216 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2217 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2218
2219 # Set it.
2220 if oInstr.sDisEnum is not None:
2221 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2222 oInstr.sDisEnum = sDisEnum;
2223
2224 _ = iEndLine;
2225 return True;
2226
2227 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2228 """
2229 Tag: \@opmincpu
2230 Value: <simple CPU name>
2231
2232 Indicates when this instruction was introduced.
2233 """
2234 oInstr = self.ensureInstructionForOpTag(iTagLine);
2235
2236 # Flatten the value, split into words, make sure there's just one, valid it.
2237 asCpus = self.flattenAllSections(aasSections).split();
2238 if len(asCpus) > 1:
2239 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2240
2241 sMinCpu = asCpus[0];
2242 if sMinCpu in g_kdCpuNames:
2243 oInstr.sMinCpu = sMinCpu;
2244 else:
2245 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2246 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2247
2248 # Set it.
2249 if oInstr.sMinCpu is None:
2250 oInstr.sMinCpu = sMinCpu;
2251 elif oInstr.sMinCpu != sMinCpu:
2252 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2253
2254 _ = iEndLine;
2255 return True;
2256
2257 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2258 """
2259 Tag: \@opcpuid
2260 Value: none | <CPUID flag specifier>
2261
2262 CPUID feature bit which is required for the instruction to be present.
2263 """
2264 oInstr = self.ensureInstructionForOpTag(iTagLine);
2265
2266 # Flatten as a space separated list, split it up and validate the values.
2267 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2268 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2269 asCpuIds = [];
2270 else:
2271 fRc = True;
2272 for iCpuId, sCpuId in enumerate(asCpuIds):
2273 if sCpuId not in g_kdCpuIdFlags:
2274 if sCpuId.strip() in g_kdCpuIdFlags:
2275 sCpuId[iCpuId] = sCpuId.strip();
2276 else:
2277 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2278 if not fRc:
2279 return False;
2280
2281 # Append them.
2282 for sCpuId in asCpuIds:
2283 if sCpuId not in oInstr.asCpuIds:
2284 oInstr.asCpuIds.append(sCpuId);
2285 else:
2286 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2287
2288 _ = iEndLine;
2289 return True;
2290
2291 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2292 """
2293 Tag: \@opgroup
2294 Value: op_grp1[_subgrp2[_subsubgrp3]]
2295
2296 Instruction grouping.
2297 """
2298 oInstr = self.ensureInstructionForOpTag(iTagLine);
2299
2300 # Flatten as a space separated list, split it up and validate the values.
2301 asGroups = self.flattenAllSections(aasSections).split();
2302 if len(asGroups) != 1:
2303 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2304 sGroup = asGroups[0];
2305 if not self.oReGroupName.match(sGroup):
2306 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2307 % (sTag, sGroup, self.oReGroupName.pattern));
2308
2309 # Set it.
2310 if oInstr.sGroup is not None:
2311 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2312 oInstr.sGroup = sGroup;
2313
2314 _ = iEndLine;
2315 return True;
2316
2317 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2318 """
2319 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2320 Value: <invalid opcode behaviour style>
2321
2322 The \@opunused indicates the specification is for a currently unused
2323 instruction encoding.
2324
2325 The \@opinvalid indicates the specification is for an invalid currently
2326 instruction encoding (like UD2).
2327
2328 The \@opinvlstyle just indicates how CPUs decode the instruction when
2329 not supported (\@opcpuid, \@opmincpu) or disabled.
2330 """
2331 oInstr = self.ensureInstructionForOpTag(iTagLine);
2332
2333 # Flatten as a space separated list, split it up and validate the values.
2334 asStyles = self.flattenAllSections(aasSections).split();
2335 if len(asStyles) != 1:
2336 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2337 sStyle = asStyles[0];
2338 if sStyle not in g_kdInvalidStyles:
2339 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2340 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2341 # Set it.
2342 if oInstr.sInvalidStyle is not None:
2343 return self.errorComment(iTagLine,
2344 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2345 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2346 oInstr.sInvalidStyle = sStyle;
2347 if sTag == '@opunused':
2348 oInstr.fUnused = True;
2349 elif sTag == '@opinvalid':
2350 oInstr.fInvalid = True;
2351
2352 _ = iEndLine;
2353 return True;
2354
2355 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2356 """
2357 Tag: \@optest
2358 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2359 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2360
2361 The main idea here is to generate basic instruction tests.
2362
2363 The probably simplest way of handling the diverse input, would be to use
2364 it to produce size optimized byte code for a simple interpreter that
2365 modifies the register input and output states.
2366
2367 An alternative to the interpreter would be creating multiple tables,
2368 but that becomes rather complicated wrt what goes where and then to use
2369 them in an efficient manner.
2370 """
2371 oInstr = self.ensureInstructionForOpTag(iTagLine);
2372
2373 #
2374 # Do it section by section.
2375 #
2376 for asSectionLines in aasSections:
2377 #
2378 # Sort the input into outputs, inputs and selector conditions.
2379 #
2380 sFlatSection = self.flattenAllSections([asSectionLines,]);
2381 if not sFlatSection:
2382 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2383 continue;
2384 oTest = InstructionTest(oInstr);
2385
2386 asSelectors = [];
2387 asInputs = [];
2388 asOutputs = [];
2389 asCur = asOutputs;
2390 fRc = True;
2391 asWords = sFlatSection.split();
2392 for iWord in range(len(asWords) - 1, -1, -1):
2393 sWord = asWords[iWord];
2394 # Check for array switchers.
2395 if sWord == '->':
2396 if asCur != asOutputs:
2397 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2398 break;
2399 asCur = asInputs;
2400 elif sWord == '/':
2401 if asCur != asInputs:
2402 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2403 break;
2404 asCur = asSelectors;
2405 else:
2406 asCur.insert(0, sWord);
2407
2408 #
2409 # Validate and add selectors.
2410 #
2411 for sCond in asSelectors:
2412 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2413 oSelector = None;
2414 for sOp in TestSelector.kasCompareOps:
2415 off = sCondExp.find(sOp);
2416 if off >= 0:
2417 sVariable = sCondExp[:off];
2418 sValue = sCondExp[off + len(sOp):];
2419 if sVariable in TestSelector.kdVariables:
2420 if sValue in TestSelector.kdVariables[sVariable]:
2421 oSelector = TestSelector(sVariable, sOp, sValue);
2422 else:
2423 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2424 % ( sTag, sValue, sCond,
2425 TestSelector.kdVariables[sVariable].keys(),));
2426 else:
2427 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2428 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2429 break;
2430 if oSelector is not None:
2431 for oExisting in oTest.aoSelectors:
2432 if oExisting.sVariable == oSelector.sVariable:
2433 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2434 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2435 oTest.aoSelectors.append(oSelector);
2436 else:
2437 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2438
2439 #
2440 # Validate outputs and inputs, adding them to the test as we go along.
2441 #
2442 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2443 asValidFieldKinds = [ 'both', sDesc, ];
2444 for sItem in asItems:
2445 oItem = None;
2446 for sOp in TestInOut.kasOperators:
2447 off = sItem.find(sOp);
2448 if off < 0:
2449 continue;
2450 sField = sItem[:off];
2451 sValueType = sItem[off + len(sOp):];
2452 if sField in TestInOut.kdFields \
2453 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2454 asSplit = sValueType.split(':', 1);
2455 sValue = asSplit[0];
2456 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2457 if sType in TestInOut.kdTypes:
2458 oValid = TestInOut.kdTypes[sType].validate(sValue);
2459 if oValid is True:
2460 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2461 oItem = TestInOut(sField, sOp, sValue, sType);
2462 else:
2463 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2464 % ( sTag, sDesc, sItem, ));
2465 else:
2466 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2467 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2468 else:
2469 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2470 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2471 else:
2472 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2473 % ( sTag, sDesc, sField, sItem,
2474 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2475 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2476 break;
2477 if oItem is not None:
2478 for oExisting in aoDst:
2479 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2480 self.errorComment(iTagLine,
2481 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2482 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2483 aoDst.append(oItem);
2484 else:
2485 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2486
2487 #
2488 # .
2489 #
2490 if fRc:
2491 oInstr.aoTests.append(oTest);
2492 else:
2493 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2494 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2495 % (sTag, asSelectors, asInputs, asOutputs,));
2496
2497 _ = iEndLine;
2498 return True;
2499
2500 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2501 """
2502 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2503 """
2504 oInstr = self.ensureInstructionForOpTag(iTagLine);
2505
2506 iTest = 0;
2507 if sTag[-1] == ']':
2508 iTest = int(sTag[8:-1]);
2509 else:
2510 iTest = int(sTag[7:]);
2511
2512 if iTest != len(oInstr.aoTests):
2513 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2514 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2515
2516 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2517 """
2518 Tag: \@optestign | \@optestignore
2519 Value: <value is ignored>
2520
2521 This is a simple trick to ignore a test while debugging another.
2522
2523 See also \@oponlytest.
2524 """
2525 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2526 return True;
2527
2528 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2529 """
2530 Tag: \@opcopytests
2531 Value: <opstat | function> [..]
2532 Example: \@opcopytests add_Eb_Gb
2533
2534 Trick to avoid duplicating tests for different encodings of the same
2535 operation.
2536 """
2537 oInstr = self.ensureInstructionForOpTag(iTagLine);
2538
2539 # Flatten, validate and append the copy job to the instruction. We execute
2540 # them after parsing all the input so we can handle forward references.
2541 asToCopy = self.flattenAllSections(aasSections).split();
2542 if not asToCopy:
2543 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2544 for sToCopy in asToCopy:
2545 if sToCopy not in oInstr.asCopyTests:
2546 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2547 oInstr.asCopyTests.append(sToCopy);
2548 else:
2549 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2550 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2551 else:
2552 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2553
2554 _ = iEndLine;
2555 return True;
2556
2557 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2558 """
2559 Tag: \@oponlytest | \@oponly
2560 Value: none
2561
2562 Only test instructions with this tag. This is a trick that is handy
2563 for singling out one or two new instructions or tests.
2564
2565 See also \@optestignore.
2566 """
2567 oInstr = self.ensureInstructionForOpTag(iTagLine);
2568
2569 # Validate and add instruction to only test dictionary.
2570 sValue = self.flattenAllSections(aasSections).strip();
2571 if sValue:
2572 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2573
2574 if oInstr not in g_aoOnlyTestInstructions:
2575 g_aoOnlyTestInstructions.append(oInstr);
2576
2577 _ = iEndLine;
2578 return True;
2579
2580 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2581 """
2582 Tag: \@opxcpttype
2583 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2584
2585 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2586 """
2587 oInstr = self.ensureInstructionForOpTag(iTagLine);
2588
2589 # Flatten as a space separated list, split it up and validate the values.
2590 asTypes = self.flattenAllSections(aasSections).split();
2591 if len(asTypes) != 1:
2592 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2593 sType = asTypes[0];
2594 if sType not in g_kdXcptTypes:
2595 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2596 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2597 # Set it.
2598 if oInstr.sXcptType is not None:
2599 return self.errorComment(iTagLine,
2600 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2601 % ( sTag, oInstr.sXcptType, sType,));
2602 oInstr.sXcptType = sType;
2603
2604 _ = iEndLine;
2605 return True;
2606
2607 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2608 """
2609 Tag: \@opfunction
2610 Value: <VMM function name>
2611
2612 This is for explicitly setting the IEM function name. Normally we pick
2613 this up from the FNIEMOP_XXX macro invocation after the description, or
2614 generate it from the mnemonic and operands.
2615
2616 It it thought it maybe necessary to set it when specifying instructions
2617 which implementation isn't following immediately or aren't implemented yet.
2618 """
2619 oInstr = self.ensureInstructionForOpTag(iTagLine);
2620
2621 # Flatten and validate the value.
2622 sFunction = self.flattenAllSections(aasSections);
2623 if not self.oReFunctionName.match(sFunction):
2624 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2625 % (sTag, sFunction, self.oReFunctionName.pattern));
2626
2627 if oInstr.sFunction is not None:
2628 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2629 % (sTag, oInstr.sFunction, sFunction,));
2630 oInstr.sFunction = sFunction;
2631
2632 _ = iEndLine;
2633 return True;
2634
2635 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2636 """
2637 Tag: \@opstats
2638 Value: <VMM statistics base name>
2639
2640 This is for explicitly setting the statistics name. Normally we pick
2641 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2642 the mnemonic and operands.
2643
2644 It it thought it maybe necessary to set it when specifying instructions
2645 which implementation isn't following immediately or aren't implemented yet.
2646 """
2647 oInstr = self.ensureInstructionForOpTag(iTagLine);
2648
2649 # Flatten and validate the value.
2650 sStats = self.flattenAllSections(aasSections);
2651 if not self.oReStatsName.match(sStats):
2652 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2653 % (sTag, sStats, self.oReStatsName.pattern));
2654
2655 if oInstr.sStats is not None:
2656 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2657 % (sTag, oInstr.sStats, sStats,));
2658 oInstr.sStats = sStats;
2659
2660 _ = iEndLine;
2661 return True;
2662
2663 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2664 """
2665 Tag: \@opdone
2666 Value: none
2667
2668 Used to explictily flush the instructions that have been specified.
2669 """
2670 sFlattened = self.flattenAllSections(aasSections);
2671 if sFlattened != '':
2672 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2673 _ = sTag; _ = iEndLine;
2674 return self.doneInstructions();
2675
2676 ## @}
2677
2678
2679 def parseComment(self):
2680 """
2681 Parse the current comment (self.sComment).
2682
2683 If it's a opcode specifiying comment, we reset the macro stuff.
2684 """
2685 #
2686 # Reject if comment doesn't seem to contain anything interesting.
2687 #
2688 if self.sComment.find('Opcode') < 0 \
2689 and self.sComment.find('@') < 0:
2690 return False;
2691
2692 #
2693 # Split the comment into lines, removing leading asterisks and spaces.
2694 # Also remove leading and trailing empty lines.
2695 #
2696 asLines = self.sComment.split('\n');
2697 for iLine, sLine in enumerate(asLines):
2698 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2699
2700 while asLines and not asLines[0]:
2701 self.iCommentLine += 1;
2702 asLines.pop(0);
2703
2704 while asLines and not asLines[-1]:
2705 asLines.pop(len(asLines) - 1);
2706
2707 #
2708 # Check for old style: Opcode 0x0f 0x12
2709 #
2710 if asLines[0].startswith('Opcode '):
2711 self.parseCommentOldOpcode(asLines);
2712
2713 #
2714 # Look for @op* tagged data.
2715 #
2716 cOpTags = 0;
2717 sFlatDefault = None;
2718 sCurTag = '@default';
2719 iCurTagLine = 0;
2720 asCurSection = [];
2721 aasSections = [ asCurSection, ];
2722 for iLine, sLine in enumerate(asLines):
2723 if not sLine.startswith('@'):
2724 if sLine:
2725 asCurSection.append(sLine);
2726 elif asCurSection:
2727 asCurSection = [];
2728 aasSections.append(asCurSection);
2729 else:
2730 #
2731 # Process the previous tag.
2732 #
2733 if not asCurSection and len(aasSections) > 1:
2734 aasSections.pop(-1);
2735 if sCurTag in self.dTagHandlers:
2736 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2737 cOpTags += 1;
2738 elif sCurTag.startswith('@op'):
2739 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2740 elif sCurTag == '@default':
2741 sFlatDefault = self.flattenAllSections(aasSections);
2742 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2743 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2744 elif sCurTag in ['@encoding', '@opencoding']:
2745 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2746
2747 #
2748 # New tag.
2749 #
2750 asSplit = sLine.split(None, 1);
2751 sCurTag = asSplit[0].lower();
2752 if len(asSplit) > 1:
2753 asCurSection = [asSplit[1],];
2754 else:
2755 asCurSection = [];
2756 aasSections = [asCurSection, ];
2757 iCurTagLine = iLine;
2758
2759 #
2760 # Process the final tag.
2761 #
2762 if not asCurSection and len(aasSections) > 1:
2763 aasSections.pop(-1);
2764 if sCurTag in self.dTagHandlers:
2765 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2766 cOpTags += 1;
2767 elif sCurTag.startswith('@op'):
2768 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2769 elif sCurTag == '@default':
2770 sFlatDefault = self.flattenAllSections(aasSections);
2771
2772 #
2773 # Don't allow default text in blocks containing @op*.
2774 #
2775 if cOpTags > 0 and sFlatDefault:
2776 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2777
2778 return True;
2779
2780 def parseMacroInvocation(self, sInvocation):
2781 """
2782 Parses a macro invocation.
2783
2784 Returns a tuple, first element is the offset following the macro
2785 invocation. The second element is a list of macro arguments, where the
2786 zero'th is the macro name.
2787 """
2788 # First the name.
2789 offOpen = sInvocation.find('(');
2790 if offOpen <= 0:
2791 self.raiseError("macro invocation open parenthesis not found");
2792 sName = sInvocation[:offOpen].strip();
2793 if not self.oReMacroName.match(sName):
2794 return self.error("invalid macro name '%s'" % (sName,));
2795 asRet = [sName, ];
2796
2797 # Arguments.
2798 iLine = self.iLine;
2799 cDepth = 1;
2800 off = offOpen + 1;
2801 offStart = off;
2802 while cDepth > 0:
2803 if off >= len(sInvocation):
2804 if iLine >= len(self.asLines):
2805 return self.error('macro invocation beyond end of file');
2806 sInvocation += self.asLines[iLine];
2807 iLine += 1;
2808 ch = sInvocation[off];
2809
2810 if ch == ',' or ch == ')':
2811 if cDepth == 1:
2812 asRet.append(sInvocation[offStart:off].strip());
2813 offStart = off + 1;
2814 if ch == ')':
2815 cDepth -= 1;
2816 elif ch == '(':
2817 cDepth += 1;
2818 off += 1;
2819
2820 return (off, asRet);
2821
2822 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2823 """
2824 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2825 """
2826 offHit = sCode.find(sMacro);
2827 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2828 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2829 return (offHit + offAfter, asRet);
2830 return (len(sCode), None);
2831
2832 def findAndParseMacroInvocation(self, sCode, sMacro):
2833 """
2834 Returns None if not found, arguments as per parseMacroInvocation if found.
2835 """
2836 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2837
2838 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2839 """
2840 Returns same as findAndParseMacroInvocation.
2841 """
2842 for sMacro in asMacro:
2843 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2844 if asRet is not None:
2845 return asRet;
2846 return None;
2847
2848 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2849 sDisHints, sIemHints, asOperands):
2850 """
2851 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2852 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2853 """
2854 #
2855 # Some invocation checks.
2856 #
2857 if sUpper != sUpper.upper():
2858 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2859 if sLower != sLower.lower():
2860 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2861 if sUpper.lower() != sLower:
2862 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2863 if not self.oReMnemonic.match(sLower):
2864 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2865
2866 #
2867 # Check if sIemHints tells us to not consider this macro invocation.
2868 #
2869 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2870 return True;
2871
2872 # Apply to the last instruction only for now.
2873 if not self.aoCurInstrs:
2874 self.addInstruction();
2875 oInstr = self.aoCurInstrs[-1];
2876 if oInstr.iLineMnemonicMacro == -1:
2877 oInstr.iLineMnemonicMacro = self.iLine;
2878 else:
2879 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2880 % (sMacro, oInstr.iLineMnemonicMacro,));
2881
2882 # Mnemonic
2883 if oInstr.sMnemonic is None:
2884 oInstr.sMnemonic = sLower;
2885 elif oInstr.sMnemonic != sLower:
2886 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2887
2888 # Process operands.
2889 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2890 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2891 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2892 for iOperand, sType in enumerate(asOperands):
2893 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2894 if sWhere is None:
2895 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2896 if iOperand < len(oInstr.aoOperands): # error recovery.
2897 sWhere = oInstr.aoOperands[iOperand].sWhere;
2898 sType = oInstr.aoOperands[iOperand].sType;
2899 else:
2900 sWhere = 'reg';
2901 sType = 'Gb';
2902 if iOperand == len(oInstr.aoOperands):
2903 oInstr.aoOperands.append(Operand(sWhere, sType))
2904 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2905 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2906 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2907 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2908
2909 # Encoding.
2910 if sForm not in g_kdIemForms:
2911 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2912 else:
2913 if oInstr.sEncoding is None:
2914 oInstr.sEncoding = g_kdIemForms[sForm][0];
2915 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2916 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2917 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2918
2919 # Check the parameter locations for the encoding.
2920 if g_kdIemForms[sForm][1] is not None:
2921 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2922 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2923 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2924 else:
2925 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2926 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2927 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2928 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2929
2930 # Stats.
2931 if not self.oReStatsName.match(sStats):
2932 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2933 elif oInstr.sStats is None:
2934 oInstr.sStats = sStats;
2935 elif oInstr.sStats != sStats:
2936 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2937 % (sMacro, oInstr.sStats, sStats,));
2938
2939 # Process the hints (simply merge with @ophints w/o checking anything).
2940 for sHint in sDisHints.split('|'):
2941 sHint = sHint.strip();
2942 if sHint.startswith('DISOPTYPE_'):
2943 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2944 if sShortHint in g_kdHints:
2945 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2946 else:
2947 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2948 elif sHint != '0':
2949 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2950
2951 for sHint in sIemHints.split('|'):
2952 sHint = sHint.strip();
2953 if sHint.startswith('IEMOPHINT_'):
2954 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2955 if sShortHint in g_kdHints:
2956 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2957 else:
2958 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2959 elif sHint != '0':
2960 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2961
2962
2963 _ = sAsm;
2964 return True;
2965
2966 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2967 """
2968 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2969 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2970 """
2971 if not asOperands:
2972 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2973 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2974 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2975
2976 def checkCodeForMacro(self, sCode):
2977 """
2978 Checks code for relevant macro invocation.
2979 """
2980 #
2981 # Scan macro invocations.
2982 #
2983 if sCode.find('(') > 0:
2984 # Look for instruction decoder function definitions. ASSUME single line.
2985 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2986 [ 'FNIEMOP_DEF',
2987 'FNIEMOP_STUB',
2988 'FNIEMOP_STUB_1',
2989 'FNIEMOP_UD_STUB',
2990 'FNIEMOP_UD_STUB_1' ]);
2991 if asArgs is not None:
2992 sFunction = asArgs[1];
2993
2994 if not self.aoCurInstrs:
2995 self.addInstruction();
2996 for oInstr in self.aoCurInstrs:
2997 if oInstr.iLineFnIemOpMacro == -1:
2998 oInstr.iLineFnIemOpMacro = self.iLine;
2999 else:
3000 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3001 self.setInstrunctionAttrib('sFunction', sFunction);
3002 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3003 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3004 if asArgs[0].find('STUB') > 0:
3005 self.doneInstructions();
3006 return True;
3007
3008 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3009 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3010 if asArgs is not None:
3011 if len(self.aoCurInstrs) == 1:
3012 oInstr = self.aoCurInstrs[0];
3013 if oInstr.sStats is None:
3014 oInstr.sStats = asArgs[1];
3015 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3016
3017 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3018 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3019 if asArgs is not None:
3020 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3021 []);
3022 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3023 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3024 if asArgs is not None:
3025 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3026 [asArgs[6],]);
3027 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3028 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3029 if asArgs is not None:
3030 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3031 [asArgs[6], asArgs[7]]);
3032 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3033 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3034 if asArgs is not None:
3035 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3036 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3037 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3038 # a_fIemHints)
3039 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3040 if asArgs is not None:
3041 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3042 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3043
3044 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3045 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3046 if asArgs is not None:
3047 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3048 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3049 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3050 if asArgs is not None:
3051 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3052 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3053 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3054 if asArgs is not None:
3055 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3056 [asArgs[4], asArgs[5],]);
3057 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3058 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3059 if asArgs is not None:
3060 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3061 [asArgs[4], asArgs[5], asArgs[6],]);
3062 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3063 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3064 if asArgs is not None:
3065 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3066 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3067
3068 return False;
3069
3070
3071 def parse(self):
3072 """
3073 Parses the given file.
3074 Returns number or errors.
3075 Raises exception on fatal trouble.
3076 """
3077 #self.debug('Parsing %s' % (self.sSrcFile,));
3078
3079 while self.iLine < len(self.asLines):
3080 sLine = self.asLines[self.iLine];
3081 self.iLine += 1;
3082
3083 # We only look for comments, so only lines with a slash might possibly
3084 # influence the parser state.
3085 offSlash = sLine.find('/');
3086 if offSlash >= 0:
3087 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3088 offLine = 0;
3089 while offLine < len(sLine):
3090 if self.iState == self.kiCode:
3091 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3092 if offHit >= 0:
3093 self.checkCodeForMacro(sLine[offLine:offHit]);
3094 self.sComment = '';
3095 self.iCommentLine = self.iLine;
3096 self.iState = self.kiCommentMulti;
3097 offLine = offHit + 2;
3098 else:
3099 self.checkCodeForMacro(sLine[offLine:]);
3100 offLine = len(sLine);
3101
3102 elif self.iState == self.kiCommentMulti:
3103 offHit = sLine.find('*/', offLine);
3104 if offHit >= 0:
3105 self.sComment += sLine[offLine:offHit];
3106 self.iState = self.kiCode;
3107 offLine = offHit + 2;
3108 self.parseComment();
3109 else:
3110 self.sComment += sLine[offLine:];
3111 offLine = len(sLine);
3112 else:
3113 assert False;
3114 # C++ line comment.
3115 elif offSlash > 0:
3116 self.checkCodeForMacro(sLine[:offSlash]);
3117
3118 # No slash, but append the line if in multi-line comment.
3119 elif self.iState == self.kiCommentMulti:
3120 #self.debug('line %d: multi' % (self.iLine,));
3121 self.sComment += sLine;
3122
3123 # No slash, but check code line for relevant macro.
3124 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3125 #self.debug('line %d: macro' % (self.iLine,));
3126 self.checkCodeForMacro(sLine);
3127
3128 # If the line is a '}' in the first position, complete the instructions.
3129 elif self.iState == self.kiCode and sLine[0] == '}':
3130 #self.debug('line %d: }' % (self.iLine,));
3131 self.doneInstructions();
3132
3133 self.doneInstructions();
3134 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3135 return self.printErrors();
3136
3137
3138def __parseFileByName(sSrcFile, sDefaultMap):
3139 """
3140 Parses one source file for instruction specfications.
3141 """
3142 #
3143 # Read sSrcFile into a line array.
3144 #
3145 try:
3146 oFile = open(sSrcFile, "r");
3147 except Exception as oXcpt:
3148 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3149 try:
3150 asLines = oFile.readlines();
3151 except Exception as oXcpt:
3152 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3153 finally:
3154 oFile.close();
3155
3156 #
3157 # Do the parsing.
3158 #
3159 try:
3160 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3161 except ParserException as oXcpt:
3162 print(str(oXcpt));
3163 raise;
3164 except Exception as oXcpt:
3165 raise;
3166
3167 return cErrors;
3168
3169
3170def __doTestCopying():
3171 """
3172 Executes the asCopyTests instructions.
3173 """
3174 asErrors = [];
3175 for oDstInstr in g_aoAllInstructions:
3176 if oDstInstr.asCopyTests:
3177 for sSrcInstr in oDstInstr.asCopyTests:
3178 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3179 if oSrcInstr:
3180 aoSrcInstrs = [oSrcInstr,];
3181 else:
3182 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3183 if aoSrcInstrs:
3184 for oSrcInstr in aoSrcInstrs:
3185 if oSrcInstr != oDstInstr:
3186 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3187 else:
3188 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3189 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3190 else:
3191 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3192 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3193
3194 if asErrors:
3195 sys.stderr.write(u''.join(asErrors));
3196 return len(asErrors);
3197
3198
3199def __applyOnlyTest():
3200 """
3201 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3202 all other instructions so that only these get tested.
3203 """
3204 if g_aoOnlyTestInstructions:
3205 for oInstr in g_aoAllInstructions:
3206 if oInstr.aoTests:
3207 if oInstr not in g_aoOnlyTestInstructions:
3208 oInstr.aoTests = [];
3209 return 0;
3210
3211def __parseAll():
3212 """
3213 Parses all the IEMAllInstruction*.cpp.h files.
3214
3215 Raises exception on failure.
3216 """
3217 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3218 cErrors = 0;
3219 for sDefaultMap, sName in [
3220 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3221 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3222 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3223 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3224 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3225 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3226 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3227 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3228 ]:
3229 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3230 cErrors += __doTestCopying();
3231 cErrors += __applyOnlyTest();
3232
3233 if cErrors != 0:
3234 #raise Exception('%d parse errors' % (cErrors,));
3235 sys.exit(1);
3236 return True;
3237
3238
3239
3240__parseAll();
3241
3242
3243#
3244# Generators (may perhaps move later).
3245#
3246def generateDisassemblerTables(oDstFile = sys.stdout):
3247 """
3248 Generates disassembler tables.
3249 """
3250
3251 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3252 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3253 assert oMap.sName == sName;
3254 asLines = [];
3255
3256 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3257 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3258 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3259 asLines.append('{');
3260
3261 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3262
3263 aoTableOrder = oMap.getInstructionsInTableOrder();
3264 for iInstr, oInstr in enumerate(aoTableOrder):
3265
3266 if (iInstr & 0xf) == 0:
3267 if iInstr != 0:
3268 asLines.append('');
3269 asLines.append(' /* %x */' % (iInstr >> 4,));
3270
3271 if oInstr is None:
3272 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3273 elif isinstance(oInstr, list):
3274 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3275 else:
3276 sMacro = 'OP';
3277 cMaxOperands = 3;
3278 if len(oInstr.aoOperands) > 3:
3279 sMacro = 'OPVEX'
3280 cMaxOperands = 4;
3281 assert len(oInstr.aoOperands) <= cMaxOperands;
3282
3283 #
3284 # Format string.
3285 #
3286 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3287 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3288 sTmp += ' ' if iOperand == 0 else ',';
3289 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3290 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3291 else:
3292 sTmp += g_kdOpTypes[oOperand.sType][2];
3293 sTmp += '",';
3294 asColumns = [ sTmp, ];
3295
3296 #
3297 # Decoders.
3298 #
3299 iStart = len(asColumns);
3300 if oInstr.sEncoding is None:
3301 pass;
3302 elif oInstr.sEncoding == 'ModR/M':
3303 # ASSUME the first operand is using the ModR/M encoding
3304 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3305 asColumns.append('IDX_ParseModRM,');
3306 ## @todo IDX_ParseVexDest
3307 # Is second operand using ModR/M too?
3308 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3309 asColumns.append('IDX_UseModRM,')
3310 elif oInstr.sEncoding in [ 'prefix', ]:
3311 for oOperand in oInstr.aoOperands:
3312 asColumns.append('0,');
3313 elif oInstr.sEncoding in [ 'fixed' ]:
3314 pass;
3315 elif oInstr.sEncoding == 'vex2':
3316 asColumns.append('IDX_ParseVex2b,')
3317 elif oInstr.sEncoding == 'vex3':
3318 asColumns.append('IDX_ParseVex3b,')
3319 elif oInstr.sEncoding in g_dInstructionMaps:
3320 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3321 else:
3322 ## @todo
3323 #IDX_ParseTwoByteEsc,
3324 #IDX_ParseGrp1,
3325 #IDX_ParseShiftGrp2,
3326 #IDX_ParseGrp3,
3327 #IDX_ParseGrp4,
3328 #IDX_ParseGrp5,
3329 #IDX_Parse3DNow,
3330 #IDX_ParseGrp6,
3331 #IDX_ParseGrp7,
3332 #IDX_ParseGrp8,
3333 #IDX_ParseGrp9,
3334 #IDX_ParseGrp10,
3335 #IDX_ParseGrp12,
3336 #IDX_ParseGrp13,
3337 #IDX_ParseGrp14,
3338 #IDX_ParseGrp15,
3339 #IDX_ParseGrp16,
3340 #IDX_ParseThreeByteEsc4,
3341 #IDX_ParseThreeByteEsc5,
3342 #IDX_ParseModFence,
3343 #IDX_ParseEscFP,
3344 #IDX_ParseNopPause,
3345 #IDX_ParseInvOpModRM,
3346 assert False, str(oInstr);
3347
3348 # Check for immediates and stuff in the remaining operands.
3349 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3350 sIdx = g_kdOpTypes[oOperand.sType][0];
3351 if sIdx != 'IDX_UseModRM':
3352 asColumns.append(sIdx + ',');
3353 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3354
3355 #
3356 # Opcode and operands.
3357 #
3358 assert oInstr.sDisEnum, str(oInstr);
3359 asColumns.append(oInstr.sDisEnum + ',');
3360 iStart = len(asColumns)
3361 for oOperand in oInstr.aoOperands:
3362 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3363 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3364
3365 #
3366 # Flags.
3367 #
3368 sTmp = '';
3369 for sHint in sorted(oInstr.dHints.keys()):
3370 sDefine = g_kdHints[sHint];
3371 if sDefine.startswith('DISOPTYPE_'):
3372 if sTmp:
3373 sTmp += ' | ' + sDefine;
3374 else:
3375 sTmp += sDefine;
3376 if sTmp:
3377 sTmp += '),';
3378 else:
3379 sTmp += '0),';
3380 asColumns.append(sTmp);
3381
3382 #
3383 # Format the columns into a line.
3384 #
3385 sLine = '';
3386 for i, s in enumerate(asColumns):
3387 if len(sLine) < aoffColumns[i]:
3388 sLine += ' ' * (aoffColumns[i] - len(sLine));
3389 else:
3390 sLine += ' ';
3391 sLine += s;
3392
3393 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3394 # DISOPTYPE_HARMLESS),
3395 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3396 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3397
3398 asLines.append(sLine);
3399
3400 asLines.append('};');
3401 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3402
3403 #
3404 # Write out the lines.
3405 #
3406 oDstFile.write('\n'.join(asLines));
3407 oDstFile.write('\n');
3408 break; #for now
3409
3410if __name__ == '__main__':
3411 generateDisassemblerTables();
3412
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette