VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66808

Last change on this file since 66808 was 66808, checked in by vboxsync, 8 years ago

IEM: Implemented movq Vq,Wq (f3 0f 73)

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 141.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66808 2017-05-05 12:06:21Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66808 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
225
226 # ModR/M.rm - register only.
227 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
228 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
229
230 # ModR/M.rm - memory only.
231 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
232 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
233 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
234 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
235 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
236 'MqWO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
237 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
238 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
239
240 # ModR/M.reg
241 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
242 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
243 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
244 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
245 'VssZxReg': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
246 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
247 'VsdZxReg': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
248 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
249 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
250 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
251 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
252 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
253 'VqZxReg': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
254
255 # Immediate values.
256 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
257 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
258 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
259 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
260 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
261 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
262
263 # Address operands (no ModR/M).
264 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
265 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
266
267 # Relative jump targets
268 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
269 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
270
271 # DS:rSI
272 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
273 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
274 # ES:rDI
275 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
276 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
277
278 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
279
280 # Fixed registers.
281 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
282 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
283 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
284 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
285 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
286 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
287 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
288 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
289};
290
291# IDX_ParseFixedReg
292# IDX_ParseVexDest
293
294
295## IEMFORM_XXX mappings.
296g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
297 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
298 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
299 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
300 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
301 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
302 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
303 'M': ( 'ModR/M', [ 'rm', ], ),
304 'M_REG': ( 'ModR/M', [ 'rm', ], ),
305 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
306 'R': ( 'ModR/M', [ 'reg', ], ),
307
308 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
309 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
310 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
311 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
312 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
313 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
314 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
315 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
316 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
317 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
318 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
319 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
320
321 'FIXED': ( 'fixed', None, )
322};
323
324## \@oppfx values.
325g_kdPrefixes = {
326 'none': [],
327 '0x66': [],
328 '0xf3': [],
329 '0xf2': [],
330};
331
332## Special \@opcode tag values.
333g_kdSpecialOpcodes = {
334 '/reg': [],
335 'mr/reg': [],
336 '11 /reg': [],
337 '!11 /reg': [],
338 '11 mr/reg': [],
339 '!11 mr/reg': [],
340};
341
342## Special \@opcodesub tag values.
343g_kdSubOpcodes = {
344 'none': [ None, ],
345 '11 mr/reg': [ '11 mr/reg', ],
346 '11': [ '11 mr/reg', ], ##< alias
347 '!11 mr/reg': [ '!11 mr/reg', ],
348 '!11': [ '!11 mr/reg', ], ##< alias
349};
350
351## Valid values for \@openc
352g_kdEncodings = {
353 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
354 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
355 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
356 'prefix': [ None, ], ##< Prefix
357};
358
359## \@opunused, \@opinvalid, \@opinvlstyle
360g_kdInvalidStyles = {
361 'immediate': [], ##< CPU stops decoding immediately after the opcode.
362 'intel-modrm': [], ##< Intel decodes ModR/M.
363 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
364 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
365 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
366};
367
368g_kdCpuNames = {
369 '8086': (),
370 '80186': (),
371 '80286': (),
372 '80386': (),
373 '80486': (),
374};
375
376## \@opcpuid
377g_kdCpuIdFlags = {
378 'vme': 'X86_CPUID_FEATURE_EDX_VME',
379 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
380 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
381 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
382 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
383 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
384 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
385 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
386 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
387 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
388 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
389 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
390 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
391 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
392 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
393 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
394 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
395 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
396 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
397 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
398 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
399 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
400 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
401 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
402 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
403 'aes': 'X86_CPUID_FEATURE_ECX_AES',
404 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
405 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
406 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
407 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
408
409 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
410 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
411 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
412 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
413 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
414 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
415 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
416 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
417 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
418 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
419};
420
421## \@ophints values.
422g_kdHints = {
423 'invalid': 'DISOPTYPE_INVALID', ##<
424 'harmless': 'DISOPTYPE_HARMLESS', ##<
425 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
426 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
427 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
428 'portio': 'DISOPTYPE_PORTIO', ##<
429 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
430 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
431 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
432 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
433 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
434 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
435 'illegal': 'DISOPTYPE_ILLEGAL', ##<
436 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
437 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
438 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
439 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
440 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
441 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
442 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
443 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
444 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
445 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
446 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
447 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
448 ## (only in 16 & 32 bits mode!)
449 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
450 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
451 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
452 'ignores_op_size': '', ##< Ignores both operand size prefixes.
453 'lock_allowed': '', ##< Lock prefix allowed.
454};
455
456## \@opxcpttype values (see SDMv2 2.4, 2.7).
457g_kdXcptTypes = {
458 'none': [],
459 '1': [],
460 '2': [],
461 '3': [],
462 '4': [],
463 '4UA': [],
464 '5': [],
465 '6': [],
466 '7': [],
467 '8': [],
468 '11': [],
469 '12': [],
470 'E1': [],
471 'E1NF': [],
472 'E2': [],
473 'E3': [],
474 'E3NF': [],
475 'E4': [],
476 'E4NF': [],
477 'E5': [],
478 'E5NF': [],
479 'E6': [],
480 'E6NF': [],
481 'E7NF': [],
482 'E9': [],
483 'E9NF': [],
484 'E10': [],
485 'E11': [],
486 'E12': [],
487 'E12NF': [],
488};
489
490
491def _isValidOpcodeByte(sOpcode):
492 """
493 Checks if sOpcode is a valid lower case opcode byte.
494 Returns true/false.
495 """
496 if len(sOpcode) == 4:
497 if sOpcode[:2] == '0x':
498 if sOpcode[2] in '0123456789abcdef':
499 if sOpcode[3] in '0123456789abcdef':
500 return True;
501 return False;
502
503
504class InstructionMap(object):
505 """
506 Instruction map.
507
508 The opcode map provides the lead opcode bytes (empty for the one byte
509 opcode map). An instruction can be member of multiple opcode maps as long
510 as it uses the same opcode value within the map (because of VEX).
511 """
512
513 kdEncodings = {
514 'legacy': [],
515 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
516 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
517 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
518 'xop8': [], ##< XOP prefix with vvvvv = 8
519 'xop9': [], ##< XOP prefix with vvvvv = 9
520 'xop10': [], ##< XOP prefix with vvvvv = 10
521 };
522 ## Selectors.
523 ## The first value is the number of table entries required by a
524 ## decoder or disassembler for this type of selector.
525 kdSelectors = {
526 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
527 '/r': [ 8, ], ##< modrm.reg selects the instruction.
528 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
529 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
530 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
531 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
532 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
533 };
534
535 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
536 assert sSelector in self.kdSelectors;
537 assert sEncoding in self.kdEncodings;
538 if asLeadOpcodes is None:
539 asLeadOpcodes = [];
540 else:
541 for sOpcode in asLeadOpcodes:
542 assert _isValidOpcodeByte(sOpcode);
543 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
544
545 self.sName = sName;
546 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
547 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
548 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
549 self.aoInstructions = []; # type: Instruction
550 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
551
552 def getTableSize(self):
553 """
554 Number of table entries. This corresponds directly to the selector.
555 """
556 return self.kdSelectors[self.sSelector][0];
557
558 def getInstructionIndex(self, oInstr):
559 """
560 Returns the table index for the instruction.
561 """
562 bOpcode = oInstr.getOpcodeByte();
563
564 # The byte selector is simple. We need a full opcode byte and need just return it.
565 if self.sSelector == 'byte':
566 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
567 return bOpcode;
568
569 # The other selectors needs masking and shifting.
570 if self.sSelector == '/r':
571 return (bOpcode >> 3) & 0x7;
572
573 if self.sSelector == 'mod /r':
574 return (bOpcode >> 3) & 0x1f;
575
576 if self.sSelector == 'memreg /r':
577 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
578
579 if self.sSelector == '!11 /r':
580 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
581 return (bOpcode >> 3) & 0x7;
582
583 if self.sSelector == '11 /r':
584 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
585 return (bOpcode >> 3) & 0x7;
586
587 if self.sSelector == '11':
588 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
589 return bOpcode & 0x3f;
590
591 assert False, self.sSelector;
592 return -1;
593
594 def getInstructionsInTableOrder(self):
595 """
596 Get instructions in table order.
597
598 Returns array of instructions. Normally there is exactly one
599 instruction per entry. However the entry could also be None if
600 not instruction was specified for that opcode value. Or there
601 could be a list of instructions to deal with special encodings
602 where for instance prefix (e.g. REX.W) encodes a different
603 instruction or different CPUs have different instructions or
604 prefixes in the same place.
605 """
606 # Start with empty table.
607 cTable = self.getTableSize();
608 aoTable = [None] * cTable;
609
610 # Insert the instructions.
611 for oInstr in self.aoInstructions:
612 if oInstr.sOpcode:
613 idxOpcode = self.getInstructionIndex(oInstr);
614 assert idxOpcode < cTable, str(idxOpcode);
615
616 oExisting = aoTable[idxOpcode];
617 if oExisting is None:
618 aoTable[idxOpcode] = oInstr;
619 elif not isinstance(oExisting, list):
620 aoTable[idxOpcode] = list([oExisting, oInstr]);
621 else:
622 oExisting.append(oInstr);
623
624 return aoTable;
625
626
627 def getDisasTableName(self):
628 """
629 Returns the disassembler table name for this map.
630 """
631 sName = 'g_aDisas';
632 for sWord in self.sName.split('_'):
633 if sWord == 'm': # suffix indicating modrm.mod==mem
634 sName += '_m';
635 elif sWord == 'r': # suffix indicating modrm.mod==reg
636 sName += '_r';
637 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
638 sName += '_' + sWord;
639 else:
640 sWord = sWord.replace('grp', 'Grp');
641 sWord = sWord.replace('map', 'Map');
642 sName += sWord[0].upper() + sWord[1:];
643 return sName;
644
645
646class TestType(object):
647 """
648 Test value type.
649
650 This base class deals with integer like values. The fUnsigned constructor
651 parameter indicates the default stance on zero vs sign extending. It is
652 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
653 """
654 def __init__(self, sName, acbSizes = None, fUnsigned = True):
655 self.sName = sName;
656 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
657 self.fUnsigned = fUnsigned;
658
659 class BadValue(Exception):
660 """ Bad value exception. """
661 def __init__(self, sMessage):
662 Exception.__init__(self, sMessage);
663 self.sMessage = sMessage;
664
665 ## For ascii ~ operator.
666 kdHexInv = {
667 '0': 'f',
668 '1': 'e',
669 '2': 'd',
670 '3': 'c',
671 '4': 'b',
672 '5': 'a',
673 '6': '9',
674 '7': '8',
675 '8': '7',
676 '9': '6',
677 'a': '5',
678 'b': '4',
679 'c': '3',
680 'd': '2',
681 'e': '1',
682 'f': '0',
683 };
684
685 def get(self, sValue):
686 """
687 Get the shortest normal sized byte representation of oValue.
688
689 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
690 The latter form is for AND+OR pairs where the first entry is what to
691 AND with the field and the second the one or OR with.
692
693 Raises BadValue if invalid value.
694 """
695 if not sValue:
696 raise TestType.BadValue('empty value');
697
698 # Deal with sign and detect hexadecimal or decimal.
699 fSignExtend = not self.fUnsigned;
700 if sValue[0] == '-' or sValue[0] == '+':
701 fSignExtend = True;
702 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
703 else:
704 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
705
706 # try convert it to long integer.
707 try:
708 iValue = long(sValue, 16 if fHex else 10);
709 except Exception as oXcpt:
710 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
711
712 # Convert the hex string and pad it to a decent value. Negative values
713 # needs to be manually converted to something non-negative (~-n + 1).
714 if iValue >= 0:
715 sHex = hex(iValue);
716 if sys.version_info[0] < 3:
717 assert sHex[-1] == 'L';
718 sHex = sHex[:-1];
719 assert sHex[:2] == '0x';
720 sHex = sHex[2:];
721 else:
722 sHex = hex(-iValue - 1);
723 if sys.version_info[0] < 3:
724 assert sHex[-1] == 'L';
725 sHex = sHex[:-1];
726 assert sHex[:2] == '0x';
727 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
728 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
729 sHex = 'f' + sHex;
730
731 cDigits = len(sHex);
732 if cDigits <= self.acbSizes[-1] * 2:
733 for cb in self.acbSizes:
734 cNaturalDigits = cb * 2;
735 if cDigits <= cNaturalDigits:
736 break;
737 else:
738 cNaturalDigits = self.acbSizes[-1] * 2;
739 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
740 assert isinstance(cNaturalDigits, int)
741
742 if cNaturalDigits != cDigits:
743 cNeeded = cNaturalDigits - cDigits;
744 if iValue >= 0:
745 sHex = ('0' * cNeeded) + sHex;
746 else:
747 sHex = ('f' * cNeeded) + sHex;
748
749 # Invert and convert to bytearray and return it.
750 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
751
752 return ((fSignExtend, abValue),);
753
754 def validate(self, sValue):
755 """
756 Returns True if value is okay, error message on failure.
757 """
758 try:
759 self.get(sValue);
760 except TestType.BadValue as oXcpt:
761 return oXcpt.sMessage;
762 return True;
763
764 def isAndOrPair(self, sValue):
765 """
766 Checks if sValue is a pair.
767 """
768 _ = sValue;
769 return False;
770
771
772class TestTypeEflags(TestType):
773 """
774 Special value parsing for EFLAGS/RFLAGS/FLAGS.
775 """
776
777 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
778
779 def __init__(self, sName):
780 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
781
782 def get(self, sValue):
783 fClear = 0;
784 fSet = 0;
785 for sFlag in sValue.split(','):
786 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
787 if sConstant is None:
788 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
789 if sConstant[0] == '!':
790 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
791 else:
792 fSet |= g_kdX86EFlagsConstants[sConstant];
793
794 aoSet = TestType.get(self, '0x%x' % (fSet,));
795 if fClear != 0:
796 aoClear = TestType.get(self, '%#x' % (fClear,))
797 assert self.isAndOrPair(sValue) is True;
798 return (aoClear[0], aoSet[0]);
799 assert self.isAndOrPair(sValue) is False;
800 return aoSet;
801
802 def isAndOrPair(self, sValue):
803 for sZeroFlag in self.kdZeroValueFlags:
804 if sValue.find(sZeroFlag) >= 0:
805 return True;
806 return False;
807
808class TestTypeFromDict(TestType):
809 """
810 Special value parsing for CR0.
811 """
812
813 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
814
815 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
816 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
817 self.kdConstantsAndValues = kdConstantsAndValues;
818 self.sConstantPrefix = sConstantPrefix;
819
820 def get(self, sValue):
821 fValue = 0;
822 for sFlag in sValue.split(','):
823 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
824 if fFlagValue is None:
825 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
826 fValue |= fFlagValue;
827 return TestType.get(self, '0x%x' % (fValue,));
828
829
830class TestInOut(object):
831 """
832 One input or output state modifier.
833
834 This should be thought as values to modify BS3REGCTX and extended (needs
835 to be structured) state.
836 """
837 ## Assigned operators.
838 kasOperators = [
839 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
840 '&~=',
841 '&=',
842 '|=',
843 '='
844 ];
845 ## Types
846 kdTypes = {
847 'uint': TestType('uint', fUnsigned = True),
848 'int': TestType('int'),
849 'efl': TestTypeEflags('efl'),
850 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
851 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
852 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
853 };
854 ## CPU context fields.
855 kdFields = {
856 # name: ( default type, [both|input|output], )
857 # Operands.
858 'op1': ( 'uint', 'both', ), ## \@op1
859 'op2': ( 'uint', 'both', ), ## \@op2
860 'op3': ( 'uint', 'both', ), ## \@op3
861 'op4': ( 'uint', 'both', ), ## \@op4
862 # Flags.
863 'efl': ( 'efl', 'both', ),
864 'efl_undef': ( 'uint', 'output', ),
865 # 8-bit GPRs.
866 'al': ( 'uint', 'both', ),
867 'cl': ( 'uint', 'both', ),
868 'dl': ( 'uint', 'both', ),
869 'bl': ( 'uint', 'both', ),
870 'ah': ( 'uint', 'both', ),
871 'ch': ( 'uint', 'both', ),
872 'dh': ( 'uint', 'both', ),
873 'bh': ( 'uint', 'both', ),
874 'r8l': ( 'uint', 'both', ),
875 'r9l': ( 'uint', 'both', ),
876 'r10l': ( 'uint', 'both', ),
877 'r11l': ( 'uint', 'both', ),
878 'r12l': ( 'uint', 'both', ),
879 'r13l': ( 'uint', 'both', ),
880 'r14l': ( 'uint', 'both', ),
881 'r15l': ( 'uint', 'both', ),
882 # 16-bit GPRs.
883 'ax': ( 'uint', 'both', ),
884 'dx': ( 'uint', 'both', ),
885 'cx': ( 'uint', 'both', ),
886 'bx': ( 'uint', 'both', ),
887 'sp': ( 'uint', 'both', ),
888 'bp': ( 'uint', 'both', ),
889 'si': ( 'uint', 'both', ),
890 'di': ( 'uint', 'both', ),
891 'r8w': ( 'uint', 'both', ),
892 'r9w': ( 'uint', 'both', ),
893 'r10w': ( 'uint', 'both', ),
894 'r11w': ( 'uint', 'both', ),
895 'r12w': ( 'uint', 'both', ),
896 'r13w': ( 'uint', 'both', ),
897 'r14w': ( 'uint', 'both', ),
898 'r15w': ( 'uint', 'both', ),
899 # 32-bit GPRs.
900 'eax': ( 'uint', 'both', ),
901 'edx': ( 'uint', 'both', ),
902 'ecx': ( 'uint', 'both', ),
903 'ebx': ( 'uint', 'both', ),
904 'esp': ( 'uint', 'both', ),
905 'ebp': ( 'uint', 'both', ),
906 'esi': ( 'uint', 'both', ),
907 'edi': ( 'uint', 'both', ),
908 'r8d': ( 'uint', 'both', ),
909 'r9d': ( 'uint', 'both', ),
910 'r10d': ( 'uint', 'both', ),
911 'r11d': ( 'uint', 'both', ),
912 'r12d': ( 'uint', 'both', ),
913 'r13d': ( 'uint', 'both', ),
914 'r14d': ( 'uint', 'both', ),
915 'r15d': ( 'uint', 'both', ),
916 # 64-bit GPRs.
917 'rax': ( 'uint', 'both', ),
918 'rdx': ( 'uint', 'both', ),
919 'rcx': ( 'uint', 'both', ),
920 'rbx': ( 'uint', 'both', ),
921 'rsp': ( 'uint', 'both', ),
922 'rbp': ( 'uint', 'both', ),
923 'rsi': ( 'uint', 'both', ),
924 'rdi': ( 'uint', 'both', ),
925 'r8': ( 'uint', 'both', ),
926 'r9': ( 'uint', 'both', ),
927 'r10': ( 'uint', 'both', ),
928 'r11': ( 'uint', 'both', ),
929 'r12': ( 'uint', 'both', ),
930 'r13': ( 'uint', 'both', ),
931 'r14': ( 'uint', 'both', ),
932 'r15': ( 'uint', 'both', ),
933 # 16-bit, 32-bit or 64-bit registers according to operand size.
934 'oz.rax': ( 'uint', 'both', ),
935 'oz.rdx': ( 'uint', 'both', ),
936 'oz.rcx': ( 'uint', 'both', ),
937 'oz.rbx': ( 'uint', 'both', ),
938 'oz.rsp': ( 'uint', 'both', ),
939 'oz.rbp': ( 'uint', 'both', ),
940 'oz.rsi': ( 'uint', 'both', ),
941 'oz.rdi': ( 'uint', 'both', ),
942 'oz.r8': ( 'uint', 'both', ),
943 'oz.r9': ( 'uint', 'both', ),
944 'oz.r10': ( 'uint', 'both', ),
945 'oz.r11': ( 'uint', 'both', ),
946 'oz.r12': ( 'uint', 'both', ),
947 'oz.r13': ( 'uint', 'both', ),
948 'oz.r14': ( 'uint', 'both', ),
949 'oz.r15': ( 'uint', 'both', ),
950 # Control registers.
951 'cr0': ( 'cr0', 'both', ),
952 'cr4': ( 'cr4', 'both', ),
953 'xcr0': ( 'xcr0', 'both', ),
954 # FPU Registers
955 'fcw': ( 'uint', 'both', ),
956 'fsw': ( 'uint', 'both', ),
957 'ftw': ( 'uint', 'both', ),
958 'fop': ( 'uint', 'both', ),
959 'fpuip': ( 'uint', 'both', ),
960 'fpucs': ( 'uint', 'both', ),
961 'fpudp': ( 'uint', 'both', ),
962 'fpuds': ( 'uint', 'both', ),
963 'mxcsr': ( 'uint', 'both', ),
964 'st0': ( 'uint', 'both', ),
965 'st1': ( 'uint', 'both', ),
966 'st2': ( 'uint', 'both', ),
967 'st3': ( 'uint', 'both', ),
968 'st4': ( 'uint', 'both', ),
969 'st5': ( 'uint', 'both', ),
970 'st6': ( 'uint', 'both', ),
971 'st7': ( 'uint', 'both', ),
972 # MMX registers.
973 'mm0': ( 'uint', 'both', ),
974 'mm1': ( 'uint', 'both', ),
975 'mm2': ( 'uint', 'both', ),
976 'mm3': ( 'uint', 'both', ),
977 'mm4': ( 'uint', 'both', ),
978 'mm5': ( 'uint', 'both', ),
979 'mm6': ( 'uint', 'both', ),
980 'mm7': ( 'uint', 'both', ),
981 # SSE registers.
982 'xmm0': ( 'uint', 'both', ),
983 'xmm1': ( 'uint', 'both', ),
984 'xmm2': ( 'uint', 'both', ),
985 'xmm3': ( 'uint', 'both', ),
986 'xmm4': ( 'uint', 'both', ),
987 'xmm5': ( 'uint', 'both', ),
988 'xmm6': ( 'uint', 'both', ),
989 'xmm7': ( 'uint', 'both', ),
990 'xmm8': ( 'uint', 'both', ),
991 'xmm9': ( 'uint', 'both', ),
992 'xmm10': ( 'uint', 'both', ),
993 'xmm11': ( 'uint', 'both', ),
994 'xmm12': ( 'uint', 'both', ),
995 'xmm13': ( 'uint', 'both', ),
996 'xmm14': ( 'uint', 'both', ),
997 'xmm15': ( 'uint', 'both', ),
998 'xmm0.lo': ( 'uint', 'both', ),
999 'xmm1.lo': ( 'uint', 'both', ),
1000 'xmm2.lo': ( 'uint', 'both', ),
1001 'xmm3.lo': ( 'uint', 'both', ),
1002 'xmm4.lo': ( 'uint', 'both', ),
1003 'xmm5.lo': ( 'uint', 'both', ),
1004 'xmm6.lo': ( 'uint', 'both', ),
1005 'xmm7.lo': ( 'uint', 'both', ),
1006 'xmm8.lo': ( 'uint', 'both', ),
1007 'xmm9.lo': ( 'uint', 'both', ),
1008 'xmm10.lo': ( 'uint', 'both', ),
1009 'xmm11.lo': ( 'uint', 'both', ),
1010 'xmm12.lo': ( 'uint', 'both', ),
1011 'xmm13.lo': ( 'uint', 'both', ),
1012 'xmm14.lo': ( 'uint', 'both', ),
1013 'xmm15.lo': ( 'uint', 'both', ),
1014 'xmm0.hi': ( 'uint', 'both', ),
1015 'xmm1.hi': ( 'uint', 'both', ),
1016 'xmm2.hi': ( 'uint', 'both', ),
1017 'xmm3.hi': ( 'uint', 'both', ),
1018 'xmm4.hi': ( 'uint', 'both', ),
1019 'xmm5.hi': ( 'uint', 'both', ),
1020 'xmm6.hi': ( 'uint', 'both', ),
1021 'xmm7.hi': ( 'uint', 'both', ),
1022 'xmm8.hi': ( 'uint', 'both', ),
1023 'xmm9.hi': ( 'uint', 'both', ),
1024 'xmm10.hi': ( 'uint', 'both', ),
1025 'xmm11.hi': ( 'uint', 'both', ),
1026 'xmm12.hi': ( 'uint', 'both', ),
1027 'xmm13.hi': ( 'uint', 'both', ),
1028 'xmm14.hi': ( 'uint', 'both', ),
1029 'xmm15.hi': ( 'uint', 'both', ),
1030 'xmm0.lo.zx': ( 'uint', 'both', ),
1031 'xmm1.lo.zx': ( 'uint', 'both', ),
1032 'xmm2.lo.zx': ( 'uint', 'both', ),
1033 'xmm3.lo.zx': ( 'uint', 'both', ),
1034 'xmm4.lo.zx': ( 'uint', 'both', ),
1035 'xmm5.lo.zx': ( 'uint', 'both', ),
1036 'xmm6.lo.zx': ( 'uint', 'both', ),
1037 'xmm7.lo.zx': ( 'uint', 'both', ),
1038 'xmm8.lo.zx': ( 'uint', 'both', ),
1039 'xmm9.lo.zx': ( 'uint', 'both', ),
1040 'xmm10.lo.zx': ( 'uint', 'both', ),
1041 'xmm11.lo.zx': ( 'uint', 'both', ),
1042 'xmm12.lo.zx': ( 'uint', 'both', ),
1043 'xmm13.lo.zx': ( 'uint', 'both', ),
1044 'xmm14.lo.zx': ( 'uint', 'both', ),
1045 'xmm15.lo.zx': ( 'uint', 'both', ),
1046 'xmm0.dw0': ( 'uint', 'both', ),
1047 'xmm1.dw0': ( 'uint', 'both', ),
1048 'xmm2.dw0': ( 'uint', 'both', ),
1049 'xmm3.dw0': ( 'uint', 'both', ),
1050 'xmm4.dw0': ( 'uint', 'both', ),
1051 'xmm5.dw0': ( 'uint', 'both', ),
1052 'xmm6.dw0': ( 'uint', 'both', ),
1053 'xmm7.dw0': ( 'uint', 'both', ),
1054 'xmm8.dw0': ( 'uint', 'both', ),
1055 'xmm9.dw0': ( 'uint', 'both', ),
1056 'xmm10.dw0': ( 'uint', 'both', ),
1057 'xmm11.dw0': ( 'uint', 'both', ),
1058 'xmm12.dw0': ( 'uint', 'both', ),
1059 'xmm13.dw0': ( 'uint', 'both', ),
1060 'xmm14.dw0': ( 'uint', 'both', ),
1061 'xmm15_dw0': ( 'uint', 'both', ),
1062 # AVX registers.
1063 'ymm0': ( 'uint', 'both', ),
1064 'ymm1': ( 'uint', 'both', ),
1065 'ymm2': ( 'uint', 'both', ),
1066 'ymm3': ( 'uint', 'both', ),
1067 'ymm4': ( 'uint', 'both', ),
1068 'ymm5': ( 'uint', 'both', ),
1069 'ymm6': ( 'uint', 'both', ),
1070 'ymm7': ( 'uint', 'both', ),
1071 'ymm8': ( 'uint', 'both', ),
1072 'ymm9': ( 'uint', 'both', ),
1073 'ymm10': ( 'uint', 'both', ),
1074 'ymm11': ( 'uint', 'both', ),
1075 'ymm12': ( 'uint', 'both', ),
1076 'ymm13': ( 'uint', 'both', ),
1077 'ymm14': ( 'uint', 'both', ),
1078 'ymm15': ( 'uint', 'both', ),
1079
1080 # Special ones.
1081 'value.xcpt': ( 'uint', 'output', ),
1082 };
1083
1084 def __init__(self, sField, sOp, sValue, sType):
1085 assert sField in self.kdFields;
1086 assert sOp in self.kasOperators;
1087 self.sField = sField;
1088 self.sOp = sOp;
1089 self.sValue = sValue;
1090 self.sType = sType;
1091 assert isinstance(sField, str);
1092 assert isinstance(sOp, str);
1093 assert isinstance(sType, str);
1094 assert isinstance(sValue, str);
1095
1096
1097class TestSelector(object):
1098 """
1099 One selector for an instruction test.
1100 """
1101 ## Selector compare operators.
1102 kasCompareOps = [ '==', '!=' ];
1103 ## Selector variables and their valid values.
1104 kdVariables = {
1105 # Operand size.
1106 'size': {
1107 'o16': 'size_o16',
1108 'o32': 'size_o32',
1109 'o64': 'size_o64',
1110 },
1111 # Execution ring.
1112 'ring': {
1113 '0': 'ring_0',
1114 '1': 'ring_1',
1115 '2': 'ring_2',
1116 '3': 'ring_3',
1117 '0..2': 'ring_0_thru_2',
1118 '1..3': 'ring_1_thru_3',
1119 },
1120 # Basic code mode.
1121 'codebits': {
1122 '64': 'code_64bit',
1123 '32': 'code_32bit',
1124 '16': 'code_16bit',
1125 },
1126 # cpu modes.
1127 'mode': {
1128 'real': 'mode_real',
1129 'prot': 'mode_prot',
1130 'long': 'mode_long',
1131 'v86': 'mode_v86',
1132 'smm': 'mode_smm',
1133 'vmx': 'mode_vmx',
1134 'svm': 'mode_svm',
1135 },
1136 # paging on/off
1137 'paging': {
1138 'on': 'paging_on',
1139 'off': 'paging_off',
1140 },
1141 # CPU vendor
1142 'vendor': {
1143 'amd': 'vendor_amd',
1144 'intel': 'vendor_intel',
1145 'via': 'vendor_via',
1146 },
1147 };
1148 ## Selector shorthand predicates.
1149 ## These translates into variable expressions.
1150 kdPredicates = {
1151 'o16': 'size==o16',
1152 'o32': 'size==o32',
1153 'o64': 'size==o64',
1154 'ring0': 'ring==0',
1155 '!ring0': 'ring==1..3',
1156 'ring1': 'ring==1',
1157 'ring2': 'ring==2',
1158 'ring3': 'ring==3',
1159 'user': 'ring==3',
1160 'supervisor': 'ring==0..2',
1161 'real': 'mode==real',
1162 'prot': 'mode==prot',
1163 'long': 'mode==long',
1164 'v86': 'mode==v86',
1165 'smm': 'mode==smm',
1166 'vmx': 'mode==vmx',
1167 'svm': 'mode==svm',
1168 'paging': 'paging==on',
1169 '!paging': 'paging==off',
1170 'amd': 'vendor==amd',
1171 '!amd': 'vendor!=amd',
1172 'intel': 'vendor==intel',
1173 '!intel': 'vendor!=intel',
1174 'via': 'vendor==via',
1175 '!via': 'vendor!=via',
1176 };
1177
1178 def __init__(self, sVariable, sOp, sValue):
1179 assert sVariable in self.kdVariables;
1180 assert sOp in self.kasCompareOps;
1181 assert sValue in self.kdVariables[sVariable];
1182 self.sVariable = sVariable;
1183 self.sOp = sOp;
1184 self.sValue = sValue;
1185
1186
1187class InstructionTest(object):
1188 """
1189 Instruction test.
1190 """
1191
1192 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1193 self.oInstr = oInstr; # type: InstructionTest
1194 self.aoInputs = []; # type: list(TestInOut)
1195 self.aoOutputs = []; # type: list(TestInOut)
1196 self.aoSelectors = []; # type: list(TestSelector)
1197
1198 def toString(self, fRepr = False):
1199 """
1200 Converts it to string representation.
1201 """
1202 asWords = [];
1203 if self.aoSelectors:
1204 for oSelector in self.aoSelectors:
1205 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1206 asWords.append('/');
1207
1208 for oModifier in self.aoInputs:
1209 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1210
1211 asWords.append('->');
1212
1213 for oModifier in self.aoOutputs:
1214 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1215
1216 if fRepr:
1217 return '<' + ' '.join(asWords) + '>';
1218 return ' '.join(asWords);
1219
1220 def __str__(self):
1221 """ Provide string represenation. """
1222 return self.toString(False);
1223
1224 def __repr__(self):
1225 """ Provide unambigious string representation. """
1226 return self.toString(True);
1227
1228class Operand(object):
1229 """
1230 Instruction operand.
1231 """
1232
1233 def __init__(self, sWhere, sType):
1234 assert sWhere in g_kdOpLocations, sWhere;
1235 assert sType in g_kdOpTypes, sType;
1236 self.sWhere = sWhere; ##< g_kdOpLocations
1237 self.sType = sType; ##< g_kdOpTypes
1238
1239 def usesModRM(self):
1240 """ Returns True if using some form of ModR/M encoding. """
1241 return self.sType[0] in ['E', 'G', 'M'];
1242
1243
1244
1245class Instruction(object): # pylint: disable=too-many-instance-attributes
1246 """
1247 Instruction.
1248 """
1249
1250 def __init__(self, sSrcFile, iLine):
1251 ## @name Core attributes.
1252 ## @{
1253 self.sMnemonic = None;
1254 self.sBrief = None;
1255 self.asDescSections = []; # type: list(str)
1256 self.aoMaps = []; # type: list(InstructionMap)
1257 self.aoOperands = []; # type: list(Operand)
1258 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1259 self.sOpcode = None; # type: str
1260 self.sSubOpcode = None; # type: str
1261 self.sEncoding = None;
1262 self.asFlTest = None;
1263 self.asFlModify = None;
1264 self.asFlUndefined = None;
1265 self.asFlSet = None;
1266 self.asFlClear = None;
1267 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1268 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1269 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1270 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1271 self.aoTests = []; # type: list(InstructionTest)
1272 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1273 self.oCpuExpr = None; ##< Some CPU restriction expression...
1274 self.sGroup = None;
1275 self.fUnused = False; ##< Unused instruction.
1276 self.fInvalid = False; ##< Invalid instruction (like UD2).
1277 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1278 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1279 ## @}
1280
1281 ## @name Implementation attributes.
1282 ## @{
1283 self.sStats = None;
1284 self.sFunction = None;
1285 self.fStub = False;
1286 self.fUdStub = False;
1287 ## @}
1288
1289 ## @name Decoding info
1290 ## @{
1291 self.sSrcFile = sSrcFile;
1292 self.iLineCreated = iLine;
1293 self.iLineCompleted = None;
1294 self.cOpTags = 0;
1295 self.iLineFnIemOpMacro = -1;
1296 self.iLineMnemonicMacro = -1;
1297 ## @}
1298
1299 ## @name Intermediate input fields.
1300 ## @{
1301 self.sRawDisOpNo = None;
1302 self.asRawDisParams = [];
1303 self.sRawIemOpFlags = None;
1304 self.sRawOldOpcodes = None;
1305 self.asCopyTests = [];
1306 ## @}
1307
1308 def toString(self, fRepr = False):
1309 """ Turn object into a string. """
1310 aasFields = [];
1311
1312 aasFields.append(['opcode', self.sOpcode]);
1313 aasFields.append(['mnemonic', self.sMnemonic]);
1314 for iOperand, oOperand in enumerate(self.aoOperands):
1315 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1316 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1317 aasFields.append(['encoding', self.sEncoding]);
1318 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1319 aasFields.append(['disenum', self.sDisEnum]);
1320 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1321 aasFields.append(['group', self.sGroup]);
1322 if self.fUnused: aasFields.append(['unused', 'True']);
1323 if self.fInvalid: aasFields.append(['invalid', 'True']);
1324 aasFields.append(['invlstyle', self.sInvalidStyle]);
1325 aasFields.append(['fltest', self.asFlTest]);
1326 aasFields.append(['flmodify', self.asFlModify]);
1327 aasFields.append(['flundef', self.asFlUndefined]);
1328 aasFields.append(['flset', self.asFlSet]);
1329 aasFields.append(['flclear', self.asFlClear]);
1330 aasFields.append(['mincpu', self.sMinCpu]);
1331 aasFields.append(['stats', self.sStats]);
1332 aasFields.append(['sFunction', self.sFunction]);
1333 if self.fStub: aasFields.append(['fStub', 'True']);
1334 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1335 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1336 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1337 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1338
1339 sRet = '<' if fRepr else '';
1340 for sField, sValue in aasFields:
1341 if sValue != None:
1342 if len(sRet) > 1:
1343 sRet += '; ';
1344 sRet += '%s=%s' % (sField, sValue,);
1345 if fRepr:
1346 sRet += '>';
1347
1348 return sRet;
1349
1350 def __str__(self):
1351 """ Provide string represenation. """
1352 return self.toString(False);
1353
1354 def __repr__(self):
1355 """ Provide unambigious string representation. """
1356 return self.toString(True);
1357
1358 def getOpcodeByte(self):
1359 """
1360 Decodes sOpcode into a byte range integer value.
1361 Raises exception if sOpcode is None or invalid.
1362 """
1363 if self.sOpcode is None:
1364 raise Exception('No opcode byte for %s!' % (self,));
1365 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1366
1367 # Full hex byte form.
1368 if sOpcode[:2] == '0x':
1369 return int(sOpcode, 16);
1370
1371 # The /r form:
1372 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1373 return int(sOpcode[-1:]) << 3;
1374
1375 # The 11/r form:
1376 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1377 return (int(sOpcode[-1:]) << 3) | 0xc0;
1378
1379 # The !11/r form (returns mod=1):
1380 ## @todo this doesn't really work...
1381 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1382 return (int(sOpcode[-1:]) << 3) | 0x80;
1383
1384 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1385
1386 @staticmethod
1387 def _flagsToIntegerMask(asFlags):
1388 """
1389 Returns the integer mask value for asFlags.
1390 """
1391 uRet = 0;
1392 if asFlags:
1393 for sFlag in asFlags:
1394 sConstant = g_kdEFlagsMnemonics[sFlag];
1395 assert sConstant[0] != '!', sConstant
1396 uRet |= g_kdX86EFlagsConstants[sConstant];
1397 return uRet;
1398
1399 def getTestedFlagsMask(self):
1400 """ Returns asFlTest into a integer mask value """
1401 return self._flagsToIntegerMask(self.asFlTest);
1402
1403 def getModifiedFlagsMask(self):
1404 """ Returns asFlModify into a integer mask value """
1405 return self._flagsToIntegerMask(self.asFlModify);
1406
1407 def getUndefinedFlagsMask(self):
1408 """ Returns asFlUndefined into a integer mask value """
1409 return self._flagsToIntegerMask(self.asFlUndefined);
1410
1411 def getSetFlagsMask(self):
1412 """ Returns asFlSet into a integer mask value """
1413 return self._flagsToIntegerMask(self.asFlSet);
1414
1415 def getClearedFlagsMask(self):
1416 """ Returns asFlClear into a integer mask value """
1417 return self._flagsToIntegerMask(self.asFlClear);
1418
1419
1420## All the instructions.
1421g_aoAllInstructions = []; # type: list(Instruction)
1422
1423## All the instructions indexed by statistics name (opstat).
1424g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1425
1426## All the instructions indexed by function name (opfunction).
1427g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1428
1429## Instructions tagged by oponlytest
1430g_aoOnlyTestInstructions = []; # type: list(Instruction)
1431
1432## Instruction maps.
1433g_dInstructionMaps = {
1434 'one': InstructionMap('one'),
1435 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1436 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1437 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1438 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1439 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1440 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1441 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1442 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1443 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1444 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1445 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1446 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1447 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1448 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1449 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1450 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1451 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1452 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1453 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1454
1455 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1456 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1457 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1458 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1459 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1460 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1461 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1462 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1463 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1464 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1465 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1466 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1467 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1468 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1469
1470 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1471 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1472
1473 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1474 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1475 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1476 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1477 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1478 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1479
1480 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1481 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1482
1483 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1484 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1485 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1486 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1487 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1488 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1489 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1490 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1491};
1492
1493
1494
1495class ParserException(Exception):
1496 """ Parser exception """
1497 def __init__(self, sMessage):
1498 Exception.__init__(self, sMessage);
1499
1500
1501class SimpleParser(object):
1502 """
1503 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1504 """
1505
1506 ## @name Parser state.
1507 ## @{
1508 kiCode = 0;
1509 kiCommentMulti = 1;
1510 ## @}
1511
1512 def __init__(self, sSrcFile, asLines, sDefaultMap):
1513 self.sSrcFile = sSrcFile;
1514 self.asLines = asLines;
1515 self.iLine = 0;
1516 self.iState = self.kiCode;
1517 self.sComment = '';
1518 self.iCommentLine = 0;
1519 self.aoCurInstrs = [];
1520
1521 assert sDefaultMap in g_dInstructionMaps;
1522 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1523
1524 self.cTotalInstr = 0;
1525 self.cTotalStubs = 0;
1526 self.cTotalTagged = 0;
1527
1528 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1529 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1530 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1531 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1532 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1533 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1534 self.fDebug = True;
1535
1536 self.dTagHandlers = {
1537 '@opbrief': self.parseTagOpBrief,
1538 '@opdesc': self.parseTagOpDesc,
1539 '@opmnemonic': self.parseTagOpMnemonic,
1540 '@op1': self.parseTagOpOperandN,
1541 '@op2': self.parseTagOpOperandN,
1542 '@op3': self.parseTagOpOperandN,
1543 '@op4': self.parseTagOpOperandN,
1544 '@oppfx': self.parseTagOpPfx,
1545 '@opmaps': self.parseTagOpMaps,
1546 '@opcode': self.parseTagOpcode,
1547 '@opcodesub': self.parseTagOpcodeSub,
1548 '@openc': self.parseTagOpEnc,
1549 '@opfltest': self.parseTagOpEFlags,
1550 '@opflmodify': self.parseTagOpEFlags,
1551 '@opflundef': self.parseTagOpEFlags,
1552 '@opflset': self.parseTagOpEFlags,
1553 '@opflclear': self.parseTagOpEFlags,
1554 '@ophints': self.parseTagOpHints,
1555 '@opdisenum': self.parseTagOpDisEnum,
1556 '@opmincpu': self.parseTagOpMinCpu,
1557 '@opcpuid': self.parseTagOpCpuId,
1558 '@opgroup': self.parseTagOpGroup,
1559 '@opunused': self.parseTagOpUnusedInvalid,
1560 '@opinvalid': self.parseTagOpUnusedInvalid,
1561 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1562 '@optest': self.parseTagOpTest,
1563 '@optestign': self.parseTagOpTestIgnore,
1564 '@optestignore': self.parseTagOpTestIgnore,
1565 '@opcopytests': self.parseTagOpCopyTests,
1566 '@oponly': self.parseTagOpOnlyTest,
1567 '@oponlytest': self.parseTagOpOnlyTest,
1568 '@opxcpttype': self.parseTagOpXcptType,
1569 '@opstats': self.parseTagOpStats,
1570 '@opfunction': self.parseTagOpFunction,
1571 '@opdone': self.parseTagOpDone,
1572 };
1573 for i in range(48):
1574 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1575 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1576
1577 self.asErrors = [];
1578
1579 def raiseError(self, sMessage):
1580 """
1581 Raise error prefixed with the source and line number.
1582 """
1583 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1584
1585 def raiseCommentError(self, iLineInComment, sMessage):
1586 """
1587 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1588 """
1589 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1590
1591 def error(self, sMessage):
1592 """
1593 Adds an error.
1594 returns False;
1595 """
1596 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1597 return False;
1598
1599 def errorComment(self, iLineInComment, sMessage):
1600 """
1601 Adds a comment error.
1602 returns False;
1603 """
1604 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1605 return False;
1606
1607 def printErrors(self):
1608 """
1609 Print the errors to stderr.
1610 Returns number of errors.
1611 """
1612 if self.asErrors:
1613 sys.stderr.write(u''.join(self.asErrors));
1614 return len(self.asErrors);
1615
1616 def debug(self, sMessage):
1617 """
1618 For debugging.
1619 """
1620 if self.fDebug:
1621 print('debug: %s' % (sMessage,));
1622
1623
1624 def addInstruction(self, iLine = None):
1625 """
1626 Adds an instruction.
1627 """
1628 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1629 g_aoAllInstructions.append(oInstr);
1630 self.aoCurInstrs.append(oInstr);
1631 return oInstr;
1632
1633 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1634 """
1635 Derives the mnemonic and operands from a IEM stats base name like string.
1636 """
1637 if oInstr.sMnemonic is None:
1638 asWords = sStats.split('_');
1639 oInstr.sMnemonic = asWords[0].lower();
1640 if len(asWords) > 1 and not oInstr.aoOperands:
1641 for sType in asWords[1:]:
1642 if sType in g_kdOpTypes:
1643 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1644 else:
1645 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1646 return False;
1647 return True;
1648
1649 def doneInstructionOne(self, oInstr, iLine):
1650 """
1651 Complete the parsing by processing, validating and expanding raw inputs.
1652 """
1653 assert oInstr.iLineCompleted is None;
1654 oInstr.iLineCompleted = iLine;
1655
1656 #
1657 # Specified instructions.
1658 #
1659 if oInstr.cOpTags > 0:
1660 if oInstr.sStats is None:
1661 pass;
1662
1663 #
1664 # Unspecified legacy stuff. We generally only got a few things to go on here.
1665 # /** Opcode 0x0f 0x00 /0. */
1666 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1667 #
1668 else:
1669 #if oInstr.sRawOldOpcodes:
1670 #
1671 #if oInstr.sMnemonic:
1672 pass;
1673
1674 #
1675 # Common defaults.
1676 #
1677
1678 # Guess mnemonic and operands from stats if the former is missing.
1679 if oInstr.sMnemonic is None:
1680 if oInstr.sStats is not None:
1681 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1682 elif oInstr.sFunction is not None:
1683 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1684
1685 # Derive the disassembler op enum constant from the mnemonic.
1686 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1687 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1688
1689 # Derive the IEM statistics base name from mnemonic and operand types.
1690 if oInstr.sStats is None:
1691 if oInstr.sFunction is not None:
1692 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1693 elif oInstr.sMnemonic is not None:
1694 oInstr.sStats = oInstr.sMnemonic;
1695 for oOperand in oInstr.aoOperands:
1696 if oOperand.sType:
1697 oInstr.sStats += '_' + oOperand.sType;
1698
1699 # Derive the IEM function name from mnemonic and operand types.
1700 if oInstr.sFunction is None:
1701 if oInstr.sMnemonic is not None:
1702 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1703 for oOperand in oInstr.aoOperands:
1704 if oOperand.sType:
1705 oInstr.sFunction += '_' + oOperand.sType;
1706 elif oInstr.sStats:
1707 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1708
1709 # Derive encoding from operands.
1710 if oInstr.sEncoding is None:
1711 if not oInstr.aoOperands:
1712 if oInstr.fUnused and oInstr.sSubOpcode:
1713 oInstr.sEncoding = 'ModR/M';
1714 else:
1715 oInstr.sEncoding = 'fixed';
1716 elif oInstr.aoOperands[0].usesModRM():
1717 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1718 oInstr.sEncoding = 'ModR/M+VEX';
1719 else:
1720 oInstr.sEncoding = 'ModR/M';
1721
1722 #
1723 # Apply default map and then add the instruction to all it's groups.
1724 #
1725 if not oInstr.aoMaps:
1726 oInstr.aoMaps = [ self.oDefaultMap, ];
1727 for oMap in oInstr.aoMaps:
1728 oMap.aoInstructions.append(oInstr);
1729
1730 #
1731 # Check the opstat value and add it to the opstat indexed dictionary.
1732 #
1733 if oInstr.sStats:
1734 if oInstr.sStats not in g_dAllInstructionsByStat:
1735 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1736 else:
1737 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1738 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1739
1740 #
1741 # Add to function indexed dictionary. We allow multiple instructions per function.
1742 #
1743 if oInstr.sFunction:
1744 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1745 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1746 else:
1747 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1748
1749 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1750 return True;
1751
1752 def doneInstructions(self, iLineInComment = None):
1753 """
1754 Done with current instruction.
1755 """
1756 for oInstr in self.aoCurInstrs:
1757 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1758 if oInstr.fStub:
1759 self.cTotalStubs += 1;
1760
1761 self.cTotalInstr += len(self.aoCurInstrs);
1762
1763 self.sComment = '';
1764 self.aoCurInstrs = [];
1765 return True;
1766
1767 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1768 """
1769 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1770 is False, only None values and empty strings are replaced.
1771 """
1772 for oInstr in self.aoCurInstrs:
1773 if fOverwrite is not True:
1774 oOldValue = getattr(oInstr, sAttrib);
1775 if oOldValue is not None:
1776 continue;
1777 setattr(oInstr, sAttrib, oValue);
1778
1779 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1780 """
1781 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1782 If fOverwrite is False, only None values and empty strings are replaced.
1783 """
1784 for oInstr in self.aoCurInstrs:
1785 aoArray = getattr(oInstr, sAttrib);
1786 while len(aoArray) <= iEntry:
1787 aoArray.append(None);
1788 if fOverwrite is True or aoArray[iEntry] is None:
1789 aoArray[iEntry] = oValue;
1790
1791 def parseCommentOldOpcode(self, asLines):
1792 """ Deals with 'Opcode 0xff /4' like comments """
1793 asWords = asLines[0].split();
1794 if len(asWords) >= 2 \
1795 and asWords[0] == 'Opcode' \
1796 and ( asWords[1].startswith('0x')
1797 or asWords[1].startswith('0X')):
1798 asWords = asWords[:1];
1799 for iWord, sWord in enumerate(asWords):
1800 if sWord.startswith('0X'):
1801 sWord = '0x' + sWord[:2];
1802 asWords[iWord] = asWords;
1803 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1804
1805 return False;
1806
1807 def ensureInstructionForOpTag(self, iTagLine):
1808 """ Ensure there is an instruction for the op-tag being parsed. """
1809 if not self.aoCurInstrs:
1810 self.addInstruction(self.iCommentLine + iTagLine);
1811 for oInstr in self.aoCurInstrs:
1812 oInstr.cOpTags += 1;
1813 if oInstr.cOpTags == 1:
1814 self.cTotalTagged += 1;
1815 return self.aoCurInstrs[-1];
1816
1817 @staticmethod
1818 def flattenSections(aasSections):
1819 """
1820 Flattens multiline sections into stripped single strings.
1821 Returns list of strings, on section per string.
1822 """
1823 asRet = [];
1824 for asLines in aasSections:
1825 if asLines:
1826 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1827 return asRet;
1828
1829 @staticmethod
1830 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1831 """
1832 Flattens sections into a simple stripped string with newlines as
1833 section breaks. The final section does not sport a trailing newline.
1834 """
1835 # Typical: One section with a single line.
1836 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1837 return aasSections[0][0].strip();
1838
1839 sRet = '';
1840 for iSection, asLines in enumerate(aasSections):
1841 if asLines:
1842 if iSection > 0:
1843 sRet += sSectionSep;
1844 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1845 return sRet;
1846
1847
1848
1849 ## @name Tag parsers
1850 ## @{
1851
1852 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1853 """
1854 Tag: \@opbrief
1855 Value: Text description, multiple sections, appended.
1856
1857 Brief description. If not given, it's the first sentence from @opdesc.
1858 """
1859 oInstr = self.ensureInstructionForOpTag(iTagLine);
1860
1861 # Flatten and validate the value.
1862 sBrief = self.flattenAllSections(aasSections);
1863 if not sBrief:
1864 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1865 if sBrief[-1] != '.':
1866 sBrief = sBrief + '.';
1867 if len(sBrief) > 180:
1868 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1869 offDot = sBrief.find('.');
1870 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1871 offDot = sBrief.find('.', offDot + 1);
1872 if offDot >= 0 and offDot != len(sBrief) - 1:
1873 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1874
1875 # Update the instruction.
1876 if oInstr.sBrief is not None:
1877 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1878 % (sTag, oInstr.sBrief, sBrief,));
1879 _ = iEndLine;
1880 return True;
1881
1882 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1883 """
1884 Tag: \@opdesc
1885 Value: Text description, multiple sections, appended.
1886
1887 It is used to describe instructions.
1888 """
1889 oInstr = self.ensureInstructionForOpTag(iTagLine);
1890 if aasSections:
1891 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1892 return True;
1893
1894 _ = sTag; _ = iEndLine;
1895 return True;
1896
1897 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1898 """
1899 Tag: @opmenmonic
1900 Value: mnemonic
1901
1902 The 'mnemonic' value must be a valid C identifier string. Because of
1903 prefixes, groups and whatnot, there times when the mnemonic isn't that
1904 of an actual assembler mnemonic.
1905 """
1906 oInstr = self.ensureInstructionForOpTag(iTagLine);
1907
1908 # Flatten and validate the value.
1909 sMnemonic = self.flattenAllSections(aasSections);
1910 if not self.oReMnemonic.match(sMnemonic):
1911 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1912 if oInstr.sMnemonic is not None:
1913 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1914 % (sTag, oInstr.sMnemonic, sMnemonic,));
1915 oInstr.sMnemonic = sMnemonic
1916
1917 _ = iEndLine;
1918 return True;
1919
1920 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1921 """
1922 Tags: \@op1, \@op2, \@op3, \@op4
1923 Value: [where:]type
1924
1925 The 'where' value indicates where the operand is found, like the 'reg'
1926 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1927 a list.
1928
1929 The 'type' value indicates the operand type. These follow the types
1930 given in the opcode tables in the CPU reference manuals.
1931 See Instruction.kdOperandTypes for a list.
1932
1933 """
1934 oInstr = self.ensureInstructionForOpTag(iTagLine);
1935 idxOp = int(sTag[-1]) - 1;
1936 assert idxOp >= 0 and idxOp < 4;
1937
1938 # flatten, split up, and validate the "where:type" value.
1939 sFlattened = self.flattenAllSections(aasSections);
1940 asSplit = sFlattened.split(':');
1941 if len(asSplit) == 1:
1942 sType = asSplit[0];
1943 sWhere = None;
1944 elif len(asSplit) == 2:
1945 (sWhere, sType) = asSplit;
1946 else:
1947 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1948
1949 if sType not in g_kdOpTypes:
1950 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1951 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1952 if sWhere is None:
1953 sWhere = g_kdOpTypes[sType][1];
1954 elif sWhere not in g_kdOpLocations:
1955 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1956 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1957
1958 # Insert the operand, refusing to overwrite an existing one.
1959 while idxOp >= len(oInstr.aoOperands):
1960 oInstr.aoOperands.append(None);
1961 if oInstr.aoOperands[idxOp] is not None:
1962 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1963 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1964 sWhere, sType,));
1965 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1966
1967 _ = iEndLine;
1968 return True;
1969
1970 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1971 """
1972 Tag: \@opmaps
1973 Value: map[,map2]
1974
1975 Indicates which maps the instruction is in. There is a default map
1976 associated with each input file.
1977 """
1978 oInstr = self.ensureInstructionForOpTag(iTagLine);
1979
1980 # Flatten, split up and validate the value.
1981 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1982 asMaps = sFlattened.split(',');
1983 if not asMaps:
1984 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1985 for sMap in asMaps:
1986 if sMap not in g_dInstructionMaps:
1987 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1988 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1989
1990 # Add the maps to the current list. Throw errors on duplicates.
1991 for oMap in oInstr.aoMaps:
1992 if oMap.sName in asMaps:
1993 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1994
1995 for sMap in asMaps:
1996 oMap = g_dInstructionMaps[sMap];
1997 if oMap not in oInstr.aoMaps:
1998 oInstr.aoMaps.append(oMap);
1999 else:
2000 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2001
2002 _ = iEndLine;
2003 return True;
2004
2005 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2006 """
2007 Tag: \@oppfx
2008 Value: n/a|none|0x66|0xf3|0xf2
2009
2010 Required prefix for the instruction. (In a (E)VEX context this is the
2011 value of the 'pp' field rather than an actual prefix.)
2012 """
2013 oInstr = self.ensureInstructionForOpTag(iTagLine);
2014
2015 # Flatten and validate the value.
2016 sFlattened = self.flattenAllSections(aasSections);
2017 asPrefixes = sFlattened.split();
2018 if len(asPrefixes) > 1:
2019 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2020
2021 sPrefix = asPrefixes[0].lower();
2022 if sPrefix == 'none':
2023 sPrefix = 'none';
2024 elif sPrefix == 'n/a':
2025 sPrefix = None;
2026 else:
2027 if len(sPrefix) == 2:
2028 sPrefix = '0x' + sPrefix;
2029 if not _isValidOpcodeByte(sPrefix):
2030 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2031
2032 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2033 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2034
2035 # Set it.
2036 if oInstr.sPrefix is not None:
2037 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2038 oInstr.sPrefix = sPrefix;
2039
2040 _ = iEndLine;
2041 return True;
2042
2043 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2044 """
2045 Tag: \@opcode
2046 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2047
2048 The opcode byte or sub-byte for the instruction in the context of a map.
2049 """
2050 oInstr = self.ensureInstructionForOpTag(iTagLine);
2051
2052 # Flatten and validate the value.
2053 sOpcode = self.flattenAllSections(aasSections);
2054 if _isValidOpcodeByte(sOpcode):
2055 pass;
2056 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2057 pass;
2058 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2059 pass;
2060 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2061 pass;
2062 else:
2063 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2064
2065 # Set it.
2066 if oInstr.sOpcode is not None:
2067 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2068 oInstr.sOpcode = sOpcode;
2069
2070 _ = iEndLine;
2071 return True;
2072
2073 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2074 """
2075 Tag: \@opcodesub
2076 Value: none | 11 mr/reg | !11 mr/reg
2077
2078 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2079 represents exactly two different instructions. The more proper way would
2080 be to go via maps with two members, but this is faster.
2081 """
2082 oInstr = self.ensureInstructionForOpTag(iTagLine);
2083
2084 # Flatten and validate the value.
2085 sSubOpcode = self.flattenAllSections(aasSections);
2086 if sSubOpcode not in g_kdSubOpcodes:
2087 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2088 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2089
2090 # Set it.
2091 if oInstr.sSubOpcode is not None:
2092 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2093 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2094 oInstr.sSubOpcode = sSubOpcode;
2095
2096 _ = iEndLine;
2097 return True;
2098
2099 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2100 """
2101 Tag: \@openc
2102 Value: ModR/M|fixed|prefix|<map name>
2103
2104 The instruction operand encoding style.
2105 """
2106 oInstr = self.ensureInstructionForOpTag(iTagLine);
2107
2108 # Flatten and validate the value.
2109 sEncoding = self.flattenAllSections(aasSections);
2110 if sEncoding in g_kdEncodings:
2111 pass;
2112 elif sEncoding in g_dInstructionMaps:
2113 pass;
2114 elif not _isValidOpcodeByte(sEncoding):
2115 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2116
2117 # Set it.
2118 if oInstr.sEncoding is not None:
2119 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2120 % ( sTag, oInstr.sEncoding, sEncoding,));
2121 oInstr.sEncoding = sEncoding;
2122
2123 _ = iEndLine;
2124 return True;
2125
2126 ## EFlags tag to Instruction attribute name.
2127 kdOpFlagToAttr = {
2128 '@opfltest': 'asFlTest',
2129 '@opflmodify': 'asFlModify',
2130 '@opflundef': 'asFlUndefined',
2131 '@opflset': 'asFlSet',
2132 '@opflclear': 'asFlClear',
2133 };
2134
2135 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2136 """
2137 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2138 Value: <eflags specifier>
2139
2140 """
2141 oInstr = self.ensureInstructionForOpTag(iTagLine);
2142
2143 # Flatten, split up and validate the values.
2144 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2145 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2146 asFlags = [];
2147 else:
2148 fRc = True;
2149 for iFlag, sFlag in enumerate(asFlags):
2150 if sFlag not in g_kdEFlagsMnemonics:
2151 if sFlag.strip() in g_kdEFlagsMnemonics:
2152 asFlags[iFlag] = sFlag.strip();
2153 else:
2154 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2155 if not fRc:
2156 return False;
2157
2158 # Set them.
2159 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2160 if asOld is not None:
2161 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2162 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2163
2164 _ = iEndLine;
2165 return True;
2166
2167 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2168 """
2169 Tag: \@ophints
2170 Value: Comma or space separated list of flags and hints.
2171
2172 This covers the disassembler flags table and more.
2173 """
2174 oInstr = self.ensureInstructionForOpTag(iTagLine);
2175
2176 # Flatten as a space separated list, split it up and validate the values.
2177 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2178 if len(asHints) == 1 and asHints[0].lower() == 'none':
2179 asHints = [];
2180 else:
2181 fRc = True;
2182 for iHint, sHint in enumerate(asHints):
2183 if sHint not in g_kdHints:
2184 if sHint.strip() in g_kdHints:
2185 sHint[iHint] = sHint.strip();
2186 else:
2187 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2188 if not fRc:
2189 return False;
2190
2191 # Append them.
2192 for sHint in asHints:
2193 if sHint not in oInstr.dHints:
2194 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2195 else:
2196 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2197
2198 _ = iEndLine;
2199 return True;
2200
2201 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2202 """
2203 Tag: \@opdisenum
2204 Value: OP_XXXX
2205
2206 This is for select a specific (legacy) disassembler enum value for the
2207 instruction.
2208 """
2209 oInstr = self.ensureInstructionForOpTag(iTagLine);
2210
2211 # Flatten and split.
2212 asWords = self.flattenAllSections(aasSections).split();
2213 if len(asWords) != 1:
2214 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2215 if not asWords:
2216 return False;
2217 sDisEnum = asWords[0];
2218 if not self.oReDisEnum.match(sDisEnum):
2219 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2220 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2221
2222 # Set it.
2223 if oInstr.sDisEnum is not None:
2224 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2225 oInstr.sDisEnum = sDisEnum;
2226
2227 _ = iEndLine;
2228 return True;
2229
2230 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2231 """
2232 Tag: \@opmincpu
2233 Value: <simple CPU name>
2234
2235 Indicates when this instruction was introduced.
2236 """
2237 oInstr = self.ensureInstructionForOpTag(iTagLine);
2238
2239 # Flatten the value, split into words, make sure there's just one, valid it.
2240 asCpus = self.flattenAllSections(aasSections).split();
2241 if len(asCpus) > 1:
2242 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2243
2244 sMinCpu = asCpus[0];
2245 if sMinCpu in g_kdCpuNames:
2246 oInstr.sMinCpu = sMinCpu;
2247 else:
2248 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2249 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2250
2251 # Set it.
2252 if oInstr.sMinCpu is None:
2253 oInstr.sMinCpu = sMinCpu;
2254 elif oInstr.sMinCpu != sMinCpu:
2255 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2256
2257 _ = iEndLine;
2258 return True;
2259
2260 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2261 """
2262 Tag: \@opcpuid
2263 Value: none | <CPUID flag specifier>
2264
2265 CPUID feature bit which is required for the instruction to be present.
2266 """
2267 oInstr = self.ensureInstructionForOpTag(iTagLine);
2268
2269 # Flatten as a space separated list, split it up and validate the values.
2270 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2271 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2272 asCpuIds = [];
2273 else:
2274 fRc = True;
2275 for iCpuId, sCpuId in enumerate(asCpuIds):
2276 if sCpuId not in g_kdCpuIdFlags:
2277 if sCpuId.strip() in g_kdCpuIdFlags:
2278 sCpuId[iCpuId] = sCpuId.strip();
2279 else:
2280 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2281 if not fRc:
2282 return False;
2283
2284 # Append them.
2285 for sCpuId in asCpuIds:
2286 if sCpuId not in oInstr.asCpuIds:
2287 oInstr.asCpuIds.append(sCpuId);
2288 else:
2289 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2290
2291 _ = iEndLine;
2292 return True;
2293
2294 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2295 """
2296 Tag: \@opgroup
2297 Value: op_grp1[_subgrp2[_subsubgrp3]]
2298
2299 Instruction grouping.
2300 """
2301 oInstr = self.ensureInstructionForOpTag(iTagLine);
2302
2303 # Flatten as a space separated list, split it up and validate the values.
2304 asGroups = self.flattenAllSections(aasSections).split();
2305 if len(asGroups) != 1:
2306 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2307 sGroup = asGroups[0];
2308 if not self.oReGroupName.match(sGroup):
2309 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2310 % (sTag, sGroup, self.oReGroupName.pattern));
2311
2312 # Set it.
2313 if oInstr.sGroup is not None:
2314 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2315 oInstr.sGroup = sGroup;
2316
2317 _ = iEndLine;
2318 return True;
2319
2320 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2321 """
2322 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2323 Value: <invalid opcode behaviour style>
2324
2325 The \@opunused indicates the specification is for a currently unused
2326 instruction encoding.
2327
2328 The \@opinvalid indicates the specification is for an invalid currently
2329 instruction encoding (like UD2).
2330
2331 The \@opinvlstyle just indicates how CPUs decode the instruction when
2332 not supported (\@opcpuid, \@opmincpu) or disabled.
2333 """
2334 oInstr = self.ensureInstructionForOpTag(iTagLine);
2335
2336 # Flatten as a space separated list, split it up and validate the values.
2337 asStyles = self.flattenAllSections(aasSections).split();
2338 if len(asStyles) != 1:
2339 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2340 sStyle = asStyles[0];
2341 if sStyle not in g_kdInvalidStyles:
2342 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2343 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2344 # Set it.
2345 if oInstr.sInvalidStyle is not None:
2346 return self.errorComment(iTagLine,
2347 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2348 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2349 oInstr.sInvalidStyle = sStyle;
2350 if sTag == '@opunused':
2351 oInstr.fUnused = True;
2352 elif sTag == '@opinvalid':
2353 oInstr.fInvalid = True;
2354
2355 _ = iEndLine;
2356 return True;
2357
2358 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2359 """
2360 Tag: \@optest
2361 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2362 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2363
2364 The main idea here is to generate basic instruction tests.
2365
2366 The probably simplest way of handling the diverse input, would be to use
2367 it to produce size optimized byte code for a simple interpreter that
2368 modifies the register input and output states.
2369
2370 An alternative to the interpreter would be creating multiple tables,
2371 but that becomes rather complicated wrt what goes where and then to use
2372 them in an efficient manner.
2373 """
2374 oInstr = self.ensureInstructionForOpTag(iTagLine);
2375
2376 #
2377 # Do it section by section.
2378 #
2379 for asSectionLines in aasSections:
2380 #
2381 # Sort the input into outputs, inputs and selector conditions.
2382 #
2383 sFlatSection = self.flattenAllSections([asSectionLines,]);
2384 if not sFlatSection:
2385 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2386 continue;
2387 oTest = InstructionTest(oInstr);
2388
2389 asSelectors = [];
2390 asInputs = [];
2391 asOutputs = [];
2392 asCur = asOutputs;
2393 fRc = True;
2394 asWords = sFlatSection.split();
2395 for iWord in range(len(asWords) - 1, -1, -1):
2396 sWord = asWords[iWord];
2397 # Check for array switchers.
2398 if sWord == '->':
2399 if asCur != asOutputs:
2400 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2401 break;
2402 asCur = asInputs;
2403 elif sWord == '/':
2404 if asCur != asInputs:
2405 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2406 break;
2407 asCur = asSelectors;
2408 else:
2409 asCur.insert(0, sWord);
2410
2411 #
2412 # Validate and add selectors.
2413 #
2414 for sCond in asSelectors:
2415 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2416 oSelector = None;
2417 for sOp in TestSelector.kasCompareOps:
2418 off = sCondExp.find(sOp);
2419 if off >= 0:
2420 sVariable = sCondExp[:off];
2421 sValue = sCondExp[off + len(sOp):];
2422 if sVariable in TestSelector.kdVariables:
2423 if sValue in TestSelector.kdVariables[sVariable]:
2424 oSelector = TestSelector(sVariable, sOp, sValue);
2425 else:
2426 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2427 % ( sTag, sValue, sCond,
2428 TestSelector.kdVariables[sVariable].keys(),));
2429 else:
2430 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2431 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2432 break;
2433 if oSelector is not None:
2434 for oExisting in oTest.aoSelectors:
2435 if oExisting.sVariable == oSelector.sVariable:
2436 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2437 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2438 oTest.aoSelectors.append(oSelector);
2439 else:
2440 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2441
2442 #
2443 # Validate outputs and inputs, adding them to the test as we go along.
2444 #
2445 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2446 asValidFieldKinds = [ 'both', sDesc, ];
2447 for sItem in asItems:
2448 oItem = None;
2449 for sOp in TestInOut.kasOperators:
2450 off = sItem.find(sOp);
2451 if off < 0:
2452 continue;
2453 sField = sItem[:off];
2454 sValueType = sItem[off + len(sOp):];
2455 if sField in TestInOut.kdFields \
2456 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2457 asSplit = sValueType.split(':', 1);
2458 sValue = asSplit[0];
2459 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2460 if sType in TestInOut.kdTypes:
2461 oValid = TestInOut.kdTypes[sType].validate(sValue);
2462 if oValid is True:
2463 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2464 oItem = TestInOut(sField, sOp, sValue, sType);
2465 else:
2466 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2467 % ( sTag, sDesc, sItem, ));
2468 else:
2469 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2470 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2471 else:
2472 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2473 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2474 else:
2475 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2476 % ( sTag, sDesc, sField, sItem,
2477 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2478 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2479 break;
2480 if oItem is not None:
2481 for oExisting in aoDst:
2482 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2483 self.errorComment(iTagLine,
2484 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2485 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2486 aoDst.append(oItem);
2487 else:
2488 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2489
2490 #
2491 # .
2492 #
2493 if fRc:
2494 oInstr.aoTests.append(oTest);
2495 else:
2496 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2497 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2498 % (sTag, asSelectors, asInputs, asOutputs,));
2499
2500 _ = iEndLine;
2501 return True;
2502
2503 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2504 """
2505 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2506 """
2507 oInstr = self.ensureInstructionForOpTag(iTagLine);
2508
2509 iTest = 0;
2510 if sTag[-1] == ']':
2511 iTest = int(sTag[8:-1]);
2512 else:
2513 iTest = int(sTag[7:]);
2514
2515 if iTest != len(oInstr.aoTests):
2516 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2517 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2518
2519 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2520 """
2521 Tag: \@optestign | \@optestignore
2522 Value: <value is ignored>
2523
2524 This is a simple trick to ignore a test while debugging another.
2525
2526 See also \@oponlytest.
2527 """
2528 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2529 return True;
2530
2531 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2532 """
2533 Tag: \@opcopytests
2534 Value: <opstat | function> [..]
2535 Example: \@opcopytests add_Eb_Gb
2536
2537 Trick to avoid duplicating tests for different encodings of the same
2538 operation.
2539 """
2540 oInstr = self.ensureInstructionForOpTag(iTagLine);
2541
2542 # Flatten, validate and append the copy job to the instruction. We execute
2543 # them after parsing all the input so we can handle forward references.
2544 asToCopy = self.flattenAllSections(aasSections).split();
2545 if not asToCopy:
2546 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2547 for sToCopy in asToCopy:
2548 if sToCopy not in oInstr.asCopyTests:
2549 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2550 oInstr.asCopyTests.append(sToCopy);
2551 else:
2552 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2553 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2554 else:
2555 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2556
2557 _ = iEndLine;
2558 return True;
2559
2560 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2561 """
2562 Tag: \@oponlytest | \@oponly
2563 Value: none
2564
2565 Only test instructions with this tag. This is a trick that is handy
2566 for singling out one or two new instructions or tests.
2567
2568 See also \@optestignore.
2569 """
2570 oInstr = self.ensureInstructionForOpTag(iTagLine);
2571
2572 # Validate and add instruction to only test dictionary.
2573 sValue = self.flattenAllSections(aasSections).strip();
2574 if sValue:
2575 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2576
2577 if oInstr not in g_aoOnlyTestInstructions:
2578 g_aoOnlyTestInstructions.append(oInstr);
2579
2580 _ = iEndLine;
2581 return True;
2582
2583 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2584 """
2585 Tag: \@opxcpttype
2586 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2587
2588 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2589 """
2590 oInstr = self.ensureInstructionForOpTag(iTagLine);
2591
2592 # Flatten as a space separated list, split it up and validate the values.
2593 asTypes = self.flattenAllSections(aasSections).split();
2594 if len(asTypes) != 1:
2595 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2596 sType = asTypes[0];
2597 if sType not in g_kdXcptTypes:
2598 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2599 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2600 # Set it.
2601 if oInstr.sXcptType is not None:
2602 return self.errorComment(iTagLine,
2603 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2604 % ( sTag, oInstr.sXcptType, sType,));
2605 oInstr.sXcptType = sType;
2606
2607 _ = iEndLine;
2608 return True;
2609
2610 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2611 """
2612 Tag: \@opfunction
2613 Value: <VMM function name>
2614
2615 This is for explicitly setting the IEM function name. Normally we pick
2616 this up from the FNIEMOP_XXX macro invocation after the description, or
2617 generate it from the mnemonic and operands.
2618
2619 It it thought it maybe necessary to set it when specifying instructions
2620 which implementation isn't following immediately or aren't implemented yet.
2621 """
2622 oInstr = self.ensureInstructionForOpTag(iTagLine);
2623
2624 # Flatten and validate the value.
2625 sFunction = self.flattenAllSections(aasSections);
2626 if not self.oReFunctionName.match(sFunction):
2627 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2628 % (sTag, sFunction, self.oReFunctionName.pattern));
2629
2630 if oInstr.sFunction is not None:
2631 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2632 % (sTag, oInstr.sFunction, sFunction,));
2633 oInstr.sFunction = sFunction;
2634
2635 _ = iEndLine;
2636 return True;
2637
2638 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2639 """
2640 Tag: \@opstats
2641 Value: <VMM statistics base name>
2642
2643 This is for explicitly setting the statistics name. Normally we pick
2644 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2645 the mnemonic and operands.
2646
2647 It it thought it maybe necessary to set it when specifying instructions
2648 which implementation isn't following immediately or aren't implemented yet.
2649 """
2650 oInstr = self.ensureInstructionForOpTag(iTagLine);
2651
2652 # Flatten and validate the value.
2653 sStats = self.flattenAllSections(aasSections);
2654 if not self.oReStatsName.match(sStats):
2655 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2656 % (sTag, sStats, self.oReStatsName.pattern));
2657
2658 if oInstr.sStats is not None:
2659 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2660 % (sTag, oInstr.sStats, sStats,));
2661 oInstr.sStats = sStats;
2662
2663 _ = iEndLine;
2664 return True;
2665
2666 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2667 """
2668 Tag: \@opdone
2669 Value: none
2670
2671 Used to explictily flush the instructions that have been specified.
2672 """
2673 sFlattened = self.flattenAllSections(aasSections);
2674 if sFlattened != '':
2675 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2676 _ = sTag; _ = iEndLine;
2677 return self.doneInstructions();
2678
2679 ## @}
2680
2681
2682 def parseComment(self):
2683 """
2684 Parse the current comment (self.sComment).
2685
2686 If it's a opcode specifiying comment, we reset the macro stuff.
2687 """
2688 #
2689 # Reject if comment doesn't seem to contain anything interesting.
2690 #
2691 if self.sComment.find('Opcode') < 0 \
2692 and self.sComment.find('@') < 0:
2693 return False;
2694
2695 #
2696 # Split the comment into lines, removing leading asterisks and spaces.
2697 # Also remove leading and trailing empty lines.
2698 #
2699 asLines = self.sComment.split('\n');
2700 for iLine, sLine in enumerate(asLines):
2701 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2702
2703 while asLines and not asLines[0]:
2704 self.iCommentLine += 1;
2705 asLines.pop(0);
2706
2707 while asLines and not asLines[-1]:
2708 asLines.pop(len(asLines) - 1);
2709
2710 #
2711 # Check for old style: Opcode 0x0f 0x12
2712 #
2713 if asLines[0].startswith('Opcode '):
2714 self.parseCommentOldOpcode(asLines);
2715
2716 #
2717 # Look for @op* tagged data.
2718 #
2719 cOpTags = 0;
2720 sFlatDefault = None;
2721 sCurTag = '@default';
2722 iCurTagLine = 0;
2723 asCurSection = [];
2724 aasSections = [ asCurSection, ];
2725 for iLine, sLine in enumerate(asLines):
2726 if not sLine.startswith('@'):
2727 if sLine:
2728 asCurSection.append(sLine);
2729 elif asCurSection:
2730 asCurSection = [];
2731 aasSections.append(asCurSection);
2732 else:
2733 #
2734 # Process the previous tag.
2735 #
2736 if not asCurSection and len(aasSections) > 1:
2737 aasSections.pop(-1);
2738 if sCurTag in self.dTagHandlers:
2739 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2740 cOpTags += 1;
2741 elif sCurTag.startswith('@op'):
2742 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2743 elif sCurTag == '@default':
2744 sFlatDefault = self.flattenAllSections(aasSections);
2745 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2746 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2747 elif sCurTag in ['@encoding', '@opencoding']:
2748 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2749
2750 #
2751 # New tag.
2752 #
2753 asSplit = sLine.split(None, 1);
2754 sCurTag = asSplit[0].lower();
2755 if len(asSplit) > 1:
2756 asCurSection = [asSplit[1],];
2757 else:
2758 asCurSection = [];
2759 aasSections = [asCurSection, ];
2760 iCurTagLine = iLine;
2761
2762 #
2763 # Process the final tag.
2764 #
2765 if not asCurSection and len(aasSections) > 1:
2766 aasSections.pop(-1);
2767 if sCurTag in self.dTagHandlers:
2768 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2769 cOpTags += 1;
2770 elif sCurTag.startswith('@op'):
2771 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2772 elif sCurTag == '@default':
2773 sFlatDefault = self.flattenAllSections(aasSections);
2774
2775 #
2776 # Don't allow default text in blocks containing @op*.
2777 #
2778 if cOpTags > 0 and sFlatDefault:
2779 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2780
2781 return True;
2782
2783 def parseMacroInvocation(self, sInvocation):
2784 """
2785 Parses a macro invocation.
2786
2787 Returns a tuple, first element is the offset following the macro
2788 invocation. The second element is a list of macro arguments, where the
2789 zero'th is the macro name.
2790 """
2791 # First the name.
2792 offOpen = sInvocation.find('(');
2793 if offOpen <= 0:
2794 self.raiseError("macro invocation open parenthesis not found");
2795 sName = sInvocation[:offOpen].strip();
2796 if not self.oReMacroName.match(sName):
2797 return self.error("invalid macro name '%s'" % (sName,));
2798 asRet = [sName, ];
2799
2800 # Arguments.
2801 iLine = self.iLine;
2802 cDepth = 1;
2803 off = offOpen + 1;
2804 offStart = off;
2805 while cDepth > 0:
2806 if off >= len(sInvocation):
2807 if iLine >= len(self.asLines):
2808 return self.error('macro invocation beyond end of file');
2809 sInvocation += self.asLines[iLine];
2810 iLine += 1;
2811 ch = sInvocation[off];
2812
2813 if ch == ',' or ch == ')':
2814 if cDepth == 1:
2815 asRet.append(sInvocation[offStart:off].strip());
2816 offStart = off + 1;
2817 if ch == ')':
2818 cDepth -= 1;
2819 elif ch == '(':
2820 cDepth += 1;
2821 off += 1;
2822
2823 return (off, asRet);
2824
2825 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2826 """
2827 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2828 """
2829 offHit = sCode.find(sMacro);
2830 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2831 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2832 return (offHit + offAfter, asRet);
2833 return (len(sCode), None);
2834
2835 def findAndParseMacroInvocation(self, sCode, sMacro):
2836 """
2837 Returns None if not found, arguments as per parseMacroInvocation if found.
2838 """
2839 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2840
2841 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2842 """
2843 Returns same as findAndParseMacroInvocation.
2844 """
2845 for sMacro in asMacro:
2846 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2847 if asRet is not None:
2848 return asRet;
2849 return None;
2850
2851 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2852 sDisHints, sIemHints, asOperands):
2853 """
2854 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2855 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2856 """
2857 #
2858 # Some invocation checks.
2859 #
2860 if sUpper != sUpper.upper():
2861 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2862 if sLower != sLower.lower():
2863 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2864 if sUpper.lower() != sLower:
2865 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2866 if not self.oReMnemonic.match(sLower):
2867 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2868
2869 #
2870 # Check if sIemHints tells us to not consider this macro invocation.
2871 #
2872 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2873 return True;
2874
2875 # Apply to the last instruction only for now.
2876 if not self.aoCurInstrs:
2877 self.addInstruction();
2878 oInstr = self.aoCurInstrs[-1];
2879 if oInstr.iLineMnemonicMacro == -1:
2880 oInstr.iLineMnemonicMacro = self.iLine;
2881 else:
2882 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2883 % (sMacro, oInstr.iLineMnemonicMacro,));
2884
2885 # Mnemonic
2886 if oInstr.sMnemonic is None:
2887 oInstr.sMnemonic = sLower;
2888 elif oInstr.sMnemonic != sLower:
2889 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2890
2891 # Process operands.
2892 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2893 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2894 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2895 for iOperand, sType in enumerate(asOperands):
2896 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2897 if sWhere is None:
2898 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2899 if iOperand < len(oInstr.aoOperands): # error recovery.
2900 sWhere = oInstr.aoOperands[iOperand].sWhere;
2901 sType = oInstr.aoOperands[iOperand].sType;
2902 else:
2903 sWhere = 'reg';
2904 sType = 'Gb';
2905 if iOperand == len(oInstr.aoOperands):
2906 oInstr.aoOperands.append(Operand(sWhere, sType))
2907 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2908 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2909 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2910 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2911
2912 # Encoding.
2913 if sForm not in g_kdIemForms:
2914 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2915 else:
2916 if oInstr.sEncoding is None:
2917 oInstr.sEncoding = g_kdIemForms[sForm][0];
2918 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2919 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2920 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2921
2922 # Check the parameter locations for the encoding.
2923 if g_kdIemForms[sForm][1] is not None:
2924 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2925 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2926 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2927 else:
2928 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2929 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2930 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2931 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2932
2933 # Stats.
2934 if not self.oReStatsName.match(sStats):
2935 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2936 elif oInstr.sStats is None:
2937 oInstr.sStats = sStats;
2938 elif oInstr.sStats != sStats:
2939 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2940 % (sMacro, oInstr.sStats, sStats,));
2941
2942 # Process the hints (simply merge with @ophints w/o checking anything).
2943 for sHint in sDisHints.split('|'):
2944 sHint = sHint.strip();
2945 if sHint.startswith('DISOPTYPE_'):
2946 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2947 if sShortHint in g_kdHints:
2948 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2949 else:
2950 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2951 elif sHint != '0':
2952 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2953
2954 for sHint in sIemHints.split('|'):
2955 sHint = sHint.strip();
2956 if sHint.startswith('IEMOPHINT_'):
2957 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2958 if sShortHint in g_kdHints:
2959 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2960 else:
2961 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2962 elif sHint != '0':
2963 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2964
2965
2966 _ = sAsm;
2967 return True;
2968
2969 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2970 """
2971 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2972 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2973 """
2974 if not asOperands:
2975 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2976 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2977 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2978
2979 def checkCodeForMacro(self, sCode):
2980 """
2981 Checks code for relevant macro invocation.
2982 """
2983 #
2984 # Scan macro invocations.
2985 #
2986 if sCode.find('(') > 0:
2987 # Look for instruction decoder function definitions. ASSUME single line.
2988 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2989 [ 'FNIEMOP_DEF',
2990 'FNIEMOP_STUB',
2991 'FNIEMOP_STUB_1',
2992 'FNIEMOP_UD_STUB',
2993 'FNIEMOP_UD_STUB_1' ]);
2994 if asArgs is not None:
2995 sFunction = asArgs[1];
2996
2997 if not self.aoCurInstrs:
2998 self.addInstruction();
2999 for oInstr in self.aoCurInstrs:
3000 if oInstr.iLineFnIemOpMacro == -1:
3001 oInstr.iLineFnIemOpMacro = self.iLine;
3002 else:
3003 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3004 self.setInstrunctionAttrib('sFunction', sFunction);
3005 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3006 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3007 if asArgs[0].find('STUB') > 0:
3008 self.doneInstructions();
3009 return True;
3010
3011 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3012 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3013 if asArgs is not None:
3014 if len(self.aoCurInstrs) == 1:
3015 oInstr = self.aoCurInstrs[0];
3016 if oInstr.sStats is None:
3017 oInstr.sStats = asArgs[1];
3018 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3019
3020 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3021 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3022 if asArgs is not None:
3023 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3024 []);
3025 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3026 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3027 if asArgs is not None:
3028 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3029 [asArgs[6],]);
3030 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3031 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3032 if asArgs is not None:
3033 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3034 [asArgs[6], asArgs[7]]);
3035 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3036 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3037 if asArgs is not None:
3038 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3039 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3040 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3041 # a_fIemHints)
3042 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3043 if asArgs is not None:
3044 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3045 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3046
3047 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3048 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3049 if asArgs is not None:
3050 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3051 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3052 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3053 if asArgs is not None:
3054 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3055 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3056 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3057 if asArgs is not None:
3058 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3059 [asArgs[4], asArgs[5],]);
3060 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3061 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3062 if asArgs is not None:
3063 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3064 [asArgs[4], asArgs[5], asArgs[6],]);
3065 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3066 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3067 if asArgs is not None:
3068 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3069 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3070
3071 return False;
3072
3073
3074 def parse(self):
3075 """
3076 Parses the given file.
3077 Returns number or errors.
3078 Raises exception on fatal trouble.
3079 """
3080 #self.debug('Parsing %s' % (self.sSrcFile,));
3081
3082 while self.iLine < len(self.asLines):
3083 sLine = self.asLines[self.iLine];
3084 self.iLine += 1;
3085
3086 # We only look for comments, so only lines with a slash might possibly
3087 # influence the parser state.
3088 offSlash = sLine.find('/');
3089 if offSlash >= 0:
3090 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3091 offLine = 0;
3092 while offLine < len(sLine):
3093 if self.iState == self.kiCode:
3094 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3095 if offHit >= 0:
3096 self.checkCodeForMacro(sLine[offLine:offHit]);
3097 self.sComment = '';
3098 self.iCommentLine = self.iLine;
3099 self.iState = self.kiCommentMulti;
3100 offLine = offHit + 2;
3101 else:
3102 self.checkCodeForMacro(sLine[offLine:]);
3103 offLine = len(sLine);
3104
3105 elif self.iState == self.kiCommentMulti:
3106 offHit = sLine.find('*/', offLine);
3107 if offHit >= 0:
3108 self.sComment += sLine[offLine:offHit];
3109 self.iState = self.kiCode;
3110 offLine = offHit + 2;
3111 self.parseComment();
3112 else:
3113 self.sComment += sLine[offLine:];
3114 offLine = len(sLine);
3115 else:
3116 assert False;
3117 # C++ line comment.
3118 elif offSlash > 0:
3119 self.checkCodeForMacro(sLine[:offSlash]);
3120
3121 # No slash, but append the line if in multi-line comment.
3122 elif self.iState == self.kiCommentMulti:
3123 #self.debug('line %d: multi' % (self.iLine,));
3124 self.sComment += sLine;
3125
3126 # No slash, but check code line for relevant macro.
3127 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3128 #self.debug('line %d: macro' % (self.iLine,));
3129 self.checkCodeForMacro(sLine);
3130
3131 # If the line is a '}' in the first position, complete the instructions.
3132 elif self.iState == self.kiCode and sLine[0] == '}':
3133 #self.debug('line %d: }' % (self.iLine,));
3134 self.doneInstructions();
3135
3136 self.doneInstructions();
3137 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3138 return self.printErrors();
3139
3140
3141def __parseFileByName(sSrcFile, sDefaultMap):
3142 """
3143 Parses one source file for instruction specfications.
3144 """
3145 #
3146 # Read sSrcFile into a line array.
3147 #
3148 try:
3149 oFile = open(sSrcFile, "r");
3150 except Exception as oXcpt:
3151 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3152 try:
3153 asLines = oFile.readlines();
3154 except Exception as oXcpt:
3155 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3156 finally:
3157 oFile.close();
3158
3159 #
3160 # Do the parsing.
3161 #
3162 try:
3163 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3164 except ParserException as oXcpt:
3165 print(str(oXcpt));
3166 raise;
3167 except Exception as oXcpt:
3168 raise;
3169
3170 return cErrors;
3171
3172
3173def __doTestCopying():
3174 """
3175 Executes the asCopyTests instructions.
3176 """
3177 asErrors = [];
3178 for oDstInstr in g_aoAllInstructions:
3179 if oDstInstr.asCopyTests:
3180 for sSrcInstr in oDstInstr.asCopyTests:
3181 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3182 if oSrcInstr:
3183 aoSrcInstrs = [oSrcInstr,];
3184 else:
3185 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3186 if aoSrcInstrs:
3187 for oSrcInstr in aoSrcInstrs:
3188 if oSrcInstr != oDstInstr:
3189 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3190 else:
3191 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3192 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3193 else:
3194 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3195 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3196
3197 if asErrors:
3198 sys.stderr.write(u''.join(asErrors));
3199 return len(asErrors);
3200
3201
3202def __applyOnlyTest():
3203 """
3204 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3205 all other instructions so that only these get tested.
3206 """
3207 if g_aoOnlyTestInstructions:
3208 for oInstr in g_aoAllInstructions:
3209 if oInstr.aoTests:
3210 if oInstr not in g_aoOnlyTestInstructions:
3211 oInstr.aoTests = [];
3212 return 0;
3213
3214def __parseAll():
3215 """
3216 Parses all the IEMAllInstruction*.cpp.h files.
3217
3218 Raises exception on failure.
3219 """
3220 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3221 cErrors = 0;
3222 for sDefaultMap, sName in [
3223 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3224 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3225 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3226 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3227 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3228 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3229 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3230 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3231 ]:
3232 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3233 cErrors += __doTestCopying();
3234 cErrors += __applyOnlyTest();
3235
3236 if cErrors != 0:
3237 #raise Exception('%d parse errors' % (cErrors,));
3238 sys.exit(1);
3239 return True;
3240
3241
3242
3243__parseAll();
3244
3245
3246#
3247# Generators (may perhaps move later).
3248#
3249def generateDisassemblerTables(oDstFile = sys.stdout):
3250 """
3251 Generates disassembler tables.
3252 """
3253
3254 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3255 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3256 assert oMap.sName == sName;
3257 asLines = [];
3258
3259 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3260 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3261 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3262 asLines.append('{');
3263
3264 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3265
3266 aoTableOrder = oMap.getInstructionsInTableOrder();
3267 for iInstr, oInstr in enumerate(aoTableOrder):
3268
3269 if (iInstr & 0xf) == 0:
3270 if iInstr != 0:
3271 asLines.append('');
3272 asLines.append(' /* %x */' % (iInstr >> 4,));
3273
3274 if oInstr is None:
3275 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3276 elif isinstance(oInstr, list):
3277 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3278 else:
3279 sMacro = 'OP';
3280 cMaxOperands = 3;
3281 if len(oInstr.aoOperands) > 3:
3282 sMacro = 'OPVEX'
3283 cMaxOperands = 4;
3284 assert len(oInstr.aoOperands) <= cMaxOperands;
3285
3286 #
3287 # Format string.
3288 #
3289 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3290 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3291 sTmp += ' ' if iOperand == 0 else ',';
3292 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3293 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3294 else:
3295 sTmp += g_kdOpTypes[oOperand.sType][2];
3296 sTmp += '",';
3297 asColumns = [ sTmp, ];
3298
3299 #
3300 # Decoders.
3301 #
3302 iStart = len(asColumns);
3303 if oInstr.sEncoding is None:
3304 pass;
3305 elif oInstr.sEncoding == 'ModR/M':
3306 # ASSUME the first operand is using the ModR/M encoding
3307 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3308 asColumns.append('IDX_ParseModRM,');
3309 ## @todo IDX_ParseVexDest
3310 # Is second operand using ModR/M too?
3311 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3312 asColumns.append('IDX_UseModRM,')
3313 elif oInstr.sEncoding in [ 'prefix', ]:
3314 for oOperand in oInstr.aoOperands:
3315 asColumns.append('0,');
3316 elif oInstr.sEncoding in [ 'fixed' ]:
3317 pass;
3318 elif oInstr.sEncoding == 'vex2':
3319 asColumns.append('IDX_ParseVex2b,')
3320 elif oInstr.sEncoding == 'vex3':
3321 asColumns.append('IDX_ParseVex3b,')
3322 elif oInstr.sEncoding in g_dInstructionMaps:
3323 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3324 else:
3325 ## @todo
3326 #IDX_ParseTwoByteEsc,
3327 #IDX_ParseGrp1,
3328 #IDX_ParseShiftGrp2,
3329 #IDX_ParseGrp3,
3330 #IDX_ParseGrp4,
3331 #IDX_ParseGrp5,
3332 #IDX_Parse3DNow,
3333 #IDX_ParseGrp6,
3334 #IDX_ParseGrp7,
3335 #IDX_ParseGrp8,
3336 #IDX_ParseGrp9,
3337 #IDX_ParseGrp10,
3338 #IDX_ParseGrp12,
3339 #IDX_ParseGrp13,
3340 #IDX_ParseGrp14,
3341 #IDX_ParseGrp15,
3342 #IDX_ParseGrp16,
3343 #IDX_ParseThreeByteEsc4,
3344 #IDX_ParseThreeByteEsc5,
3345 #IDX_ParseModFence,
3346 #IDX_ParseEscFP,
3347 #IDX_ParseNopPause,
3348 #IDX_ParseInvOpModRM,
3349 assert False, str(oInstr);
3350
3351 # Check for immediates and stuff in the remaining operands.
3352 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3353 sIdx = g_kdOpTypes[oOperand.sType][0];
3354 if sIdx != 'IDX_UseModRM':
3355 asColumns.append(sIdx + ',');
3356 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3357
3358 #
3359 # Opcode and operands.
3360 #
3361 assert oInstr.sDisEnum, str(oInstr);
3362 asColumns.append(oInstr.sDisEnum + ',');
3363 iStart = len(asColumns)
3364 for oOperand in oInstr.aoOperands:
3365 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3366 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3367
3368 #
3369 # Flags.
3370 #
3371 sTmp = '';
3372 for sHint in sorted(oInstr.dHints.keys()):
3373 sDefine = g_kdHints[sHint];
3374 if sDefine.startswith('DISOPTYPE_'):
3375 if sTmp:
3376 sTmp += ' | ' + sDefine;
3377 else:
3378 sTmp += sDefine;
3379 if sTmp:
3380 sTmp += '),';
3381 else:
3382 sTmp += '0),';
3383 asColumns.append(sTmp);
3384
3385 #
3386 # Format the columns into a line.
3387 #
3388 sLine = '';
3389 for i, s in enumerate(asColumns):
3390 if len(sLine) < aoffColumns[i]:
3391 sLine += ' ' * (aoffColumns[i] - len(sLine));
3392 else:
3393 sLine += ' ';
3394 sLine += s;
3395
3396 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3397 # DISOPTYPE_HARMLESS),
3398 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3399 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3400
3401 asLines.append(sLine);
3402
3403 asLines.append('};');
3404 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3405
3406 #
3407 # Write out the lines.
3408 #
3409 oDstFile.write('\n'.join(asLines));
3410 oDstFile.write('\n');
3411 break; #for now
3412
3413if __name__ == '__main__':
3414 generateDisassemblerTables();
3415
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette