VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66464

Last change on this file since 66464 was 66464, checked in by vboxsync, 8 years ago

IEM: Implemented movss Vss,Wss (f3 0f 10).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 140.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66464 2017-04-06 19:22:01Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66464 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224
225 # ModR/M.rm - register only.
226 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
227 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
228
229 # ModR/M.rm - memory only.
230 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
231 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
232 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
233 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
234 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
235 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
236 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
237
238 # ModR/M.reg
239 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
240 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
241 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
242 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
243 'VssZxReg': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
244 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
245 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
246 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
247 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
248 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
249
250 # Immediate values.
251 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
252 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
253 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
254 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
255 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
256 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
257
258 # Address operands (no ModR/M).
259 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
260 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
261
262 # Relative jump targets
263 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
264 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
265
266 # DS:rSI
267 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
268 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
269 # ES:rDI
270 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
271 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
272
273 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
274
275 # Fixed registers.
276 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
277 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
278 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
279 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
280 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
281 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
282 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
283 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
284};
285
286# IDX_ParseFixedReg
287# IDX_ParseVexDest
288
289
290## IEMFORM_XXX mappings.
291g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
292 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
293 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
294 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
295 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
296 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
297 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
298 'M': ( 'ModR/M', [ 'rm', ], ),
299 'M_REG': ( 'ModR/M', [ 'rm', ], ),
300 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
301 'R': ( 'ModR/M', [ 'reg', ], ),
302
303 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
304 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
305 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
306 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
307 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
308 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
309 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
310 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
311 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
312 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
313 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
314 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
315
316 'FIXED': ( 'fixed', None, )
317};
318
319## \@oppfx values.
320g_kdPrefixes = {
321 'none': [],
322 '0x66': [],
323 '0xf3': [],
324 '0xf2': [],
325};
326
327## Special \@opcode tag values.
328g_kdSpecialOpcodes = {
329 '/reg': [],
330 'mr/reg': [],
331 '11 /reg': [],
332 '!11 /reg': [],
333 '11 mr/reg': [],
334 '!11 mr/reg': [],
335};
336
337## Special \@opcodesub tag values.
338g_kdSubOpcodes = {
339 'none': [ None, ],
340 '11 mr/reg': [ '11 mr/reg', ],
341 '11': [ '11 mr/reg', ], ##< alias
342 '!11 mr/reg': [ '!11 mr/reg', ],
343 '!11': [ '!11 mr/reg', ], ##< alias
344};
345
346## Valid values for \@openc
347g_kdEncodings = {
348 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
349 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
350 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
351 'prefix': [ None, ], ##< Prefix
352};
353
354## \@opunused, \@opinvalid, \@opinvlstyle
355g_kdInvalidStyles = {
356 'immediate': [], ##< CPU stops decoding immediately after the opcode.
357 'intel-modrm': [], ##< Intel decodes ModR/M.
358 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
359 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
360 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
361};
362
363g_kdCpuNames = {
364 '8086': (),
365 '80186': (),
366 '80286': (),
367 '80386': (),
368 '80486': (),
369};
370
371## \@opcpuid
372g_kdCpuIdFlags = {
373 'vme': 'X86_CPUID_FEATURE_EDX_VME',
374 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
375 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
376 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
377 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
378 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
379 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
380 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
381 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
382 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
383 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
384 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
385 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
386 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
387 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
388 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
389 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
390 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
391 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
392 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
393 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
394 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
395 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
396 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
397 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
398 'aes': 'X86_CPUID_FEATURE_ECX_AES',
399 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
400 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
401 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
402 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
403
404 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
405 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
406 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
407 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
408 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
409 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
410 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
411 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
412 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
413 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
414};
415
416## \@ophints values.
417g_kdHints = {
418 'invalid': 'DISOPTYPE_INVALID', ##<
419 'harmless': 'DISOPTYPE_HARMLESS', ##<
420 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
421 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
422 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
423 'portio': 'DISOPTYPE_PORTIO', ##<
424 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
425 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
426 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
427 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
428 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
429 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
430 'illegal': 'DISOPTYPE_ILLEGAL', ##<
431 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
432 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
433 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
434 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
435 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
436 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
437 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
438 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
439 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
440 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
441 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
442 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
443 ## (only in 16 & 32 bits mode!)
444 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
445 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
446 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
447 'ignores_op_size': '', ##< Ignores both operand size prefixes.
448 'lock_allowed': '', ##< Lock prefix allowed.
449};
450
451## \@opxcpttype values (see SDMv2 2.4, 2.7).
452g_kdXcptTypes = {
453 'none': [],
454 '1': [],
455 '2': [],
456 '3': [],
457 '4': [],
458 '4UA': [],
459 '5': [],
460 '6': [],
461 '7': [],
462 '8': [],
463 '11': [],
464 '12': [],
465 'E1': [],
466 'E1NF': [],
467 'E2': [],
468 'E3': [],
469 'E3NF': [],
470 'E4': [],
471 'E4NF': [],
472 'E5': [],
473 'E5NF': [],
474 'E6': [],
475 'E6NF': [],
476 'E7NF': [],
477 'E9': [],
478 'E9NF': [],
479 'E10': [],
480 'E11': [],
481 'E12': [],
482 'E12NF': [],
483};
484
485
486def _isValidOpcodeByte(sOpcode):
487 """
488 Checks if sOpcode is a valid lower case opcode byte.
489 Returns true/false.
490 """
491 if len(sOpcode) == 4:
492 if sOpcode[:2] == '0x':
493 if sOpcode[2] in '0123456789abcdef':
494 if sOpcode[3] in '0123456789abcdef':
495 return True;
496 return False;
497
498
499class InstructionMap(object):
500 """
501 Instruction map.
502
503 The opcode map provides the lead opcode bytes (empty for the one byte
504 opcode map). An instruction can be member of multiple opcode maps as long
505 as it uses the same opcode value within the map (because of VEX).
506 """
507
508 kdEncodings = {
509 'legacy': [],
510 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
511 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
512 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
513 'xop8': [], ##< XOP prefix with vvvvv = 8
514 'xop9': [], ##< XOP prefix with vvvvv = 9
515 'xop10': [], ##< XOP prefix with vvvvv = 10
516 };
517 ## Selectors.
518 ## The first value is the number of table entries required by a
519 ## decoder or disassembler for this type of selector.
520 kdSelectors = {
521 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
522 '/r': [ 8, ], ##< modrm.reg selects the instruction.
523 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
524 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
525 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
526 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
527 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
528 };
529
530 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
531 assert sSelector in self.kdSelectors;
532 assert sEncoding in self.kdEncodings;
533 if asLeadOpcodes is None:
534 asLeadOpcodes = [];
535 else:
536 for sOpcode in asLeadOpcodes:
537 assert _isValidOpcodeByte(sOpcode);
538 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
539
540 self.sName = sName;
541 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
542 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
543 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
544 self.aoInstructions = []; # type: Instruction
545 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
546
547 def getTableSize(self):
548 """
549 Number of table entries. This corresponds directly to the selector.
550 """
551 return self.kdSelectors[self.sSelector][0];
552
553 def getInstructionIndex(self, oInstr):
554 """
555 Returns the table index for the instruction.
556 """
557 bOpcode = oInstr.getOpcodeByte();
558
559 # The byte selector is simple. We need a full opcode byte and need just return it.
560 if self.sSelector == 'byte':
561 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
562 return bOpcode;
563
564 # The other selectors needs masking and shifting.
565 if self.sSelector == '/r':
566 return (bOpcode >> 3) & 0x7;
567
568 if self.sSelector == 'mod /r':
569 return (bOpcode >> 3) & 0x1f;
570
571 if self.sSelector == 'memreg /r':
572 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
573
574 if self.sSelector == '!11 /r':
575 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
576 return (bOpcode >> 3) & 0x7;
577
578 if self.sSelector == '11 /r':
579 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
580 return (bOpcode >> 3) & 0x7;
581
582 if self.sSelector == '11':
583 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
584 return bOpcode & 0x3f;
585
586 assert False, self.sSelector;
587 return -1;
588
589 def getInstructionsInTableOrder(self):
590 """
591 Get instructions in table order.
592
593 Returns array of instructions. Normally there is exactly one
594 instruction per entry. However the entry could also be None if
595 not instruction was specified for that opcode value. Or there
596 could be a list of instructions to deal with special encodings
597 where for instance prefix (e.g. REX.W) encodes a different
598 instruction or different CPUs have different instructions or
599 prefixes in the same place.
600 """
601 # Start with empty table.
602 cTable = self.getTableSize();
603 aoTable = [None] * cTable;
604
605 # Insert the instructions.
606 for oInstr in self.aoInstructions:
607 if oInstr.sOpcode:
608 idxOpcode = self.getInstructionIndex(oInstr);
609 assert idxOpcode < cTable, str(idxOpcode);
610
611 oExisting = aoTable[idxOpcode];
612 if oExisting is None:
613 aoTable[idxOpcode] = oInstr;
614 elif not isinstance(oExisting, list):
615 aoTable[idxOpcode] = list([oExisting, oInstr]);
616 else:
617 oExisting.append(oInstr);
618
619 return aoTable;
620
621
622 def getDisasTableName(self):
623 """
624 Returns the disassembler table name for this map.
625 """
626 sName = 'g_aDisas';
627 for sWord in self.sName.split('_'):
628 if sWord == 'm': # suffix indicating modrm.mod==mem
629 sName += '_m';
630 elif sWord == 'r': # suffix indicating modrm.mod==reg
631 sName += '_r';
632 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
633 sName += '_' + sWord;
634 else:
635 sWord = sWord.replace('grp', 'Grp');
636 sWord = sWord.replace('map', 'Map');
637 sName += sWord[0].upper() + sWord[1:];
638 return sName;
639
640
641class TestType(object):
642 """
643 Test value type.
644
645 This base class deals with integer like values. The fUnsigned constructor
646 parameter indicates the default stance on zero vs sign extending. It is
647 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
648 """
649 def __init__(self, sName, acbSizes = None, fUnsigned = True):
650 self.sName = sName;
651 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
652 self.fUnsigned = fUnsigned;
653
654 class BadValue(Exception):
655 """ Bad value exception. """
656 def __init__(self, sMessage):
657 Exception.__init__(self, sMessage);
658 self.sMessage = sMessage;
659
660 ## For ascii ~ operator.
661 kdHexInv = {
662 '0': 'f',
663 '1': 'e',
664 '2': 'd',
665 '3': 'c',
666 '4': 'b',
667 '5': 'a',
668 '6': '9',
669 '7': '8',
670 '8': '7',
671 '9': '6',
672 'a': '5',
673 'b': '4',
674 'c': '3',
675 'd': '2',
676 'e': '1',
677 'f': '0',
678 };
679
680 def get(self, sValue):
681 """
682 Get the shortest normal sized byte representation of oValue.
683
684 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
685 The latter form is for AND+OR pairs where the first entry is what to
686 AND with the field and the second the one or OR with.
687
688 Raises BadValue if invalid value.
689 """
690 if not sValue:
691 raise TestType.BadValue('empty value');
692
693 # Deal with sign and detect hexadecimal or decimal.
694 fSignExtend = not self.fUnsigned;
695 if sValue[0] == '-' or sValue[0] == '+':
696 fSignExtend = True;
697 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
698 else:
699 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
700
701 # try convert it to long integer.
702 try:
703 iValue = long(sValue, 16 if fHex else 10);
704 except Exception as oXcpt:
705 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
706
707 # Convert the hex string and pad it to a decent value. Negative values
708 # needs to be manually converted to something non-negative (~-n + 1).
709 if iValue >= 0:
710 sHex = hex(iValue);
711 if sys.version_info[0] < 3:
712 assert sHex[-1] == 'L';
713 sHex = sHex[:-1];
714 assert sHex[:2] == '0x';
715 sHex = sHex[2:];
716 else:
717 sHex = hex(-iValue - 1);
718 if sys.version_info[0] < 3:
719 assert sHex[-1] == 'L';
720 sHex = sHex[:-1];
721 assert sHex[:2] == '0x';
722 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
723 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
724 sHex = 'f' + sHex;
725
726 cDigits = len(sHex);
727 if cDigits <= self.acbSizes[-1] * 2:
728 for cb in self.acbSizes:
729 cNaturalDigits = cb * 2;
730 if cDigits <= cNaturalDigits:
731 break;
732 else:
733 cNaturalDigits = self.acbSizes[-1] * 2;
734 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
735 assert isinstance(cNaturalDigits, int)
736
737 if cNaturalDigits != cDigits:
738 cNeeded = cNaturalDigits - cDigits;
739 if iValue >= 0:
740 sHex = ('0' * cNeeded) + sHex;
741 else:
742 sHex = ('f' * cNeeded) + sHex;
743
744 # Invert and convert to bytearray and return it.
745 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
746
747 return ((fSignExtend, abValue),);
748
749 def validate(self, sValue):
750 """
751 Returns True if value is okay, error message on failure.
752 """
753 try:
754 self.get(sValue);
755 except TestType.BadValue as oXcpt:
756 return oXcpt.sMessage;
757 return True;
758
759 def isAndOrPair(self, sValue):
760 """
761 Checks if sValue is a pair.
762 """
763 _ = sValue;
764 return False;
765
766
767class TestTypeEflags(TestType):
768 """
769 Special value parsing for EFLAGS/RFLAGS/FLAGS.
770 """
771
772 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
773
774 def __init__(self, sName):
775 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
776
777 def get(self, sValue):
778 fClear = 0;
779 fSet = 0;
780 for sFlag in sValue.split(','):
781 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
782 if sConstant is None:
783 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
784 if sConstant[0] == '!':
785 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
786 else:
787 fSet |= g_kdX86EFlagsConstants[sConstant];
788
789 aoSet = TestType.get(self, '0x%x' % (fSet,));
790 if fClear != 0:
791 aoClear = TestType.get(self, '%#x' % (fClear,))
792 assert self.isAndOrPair(sValue) is True;
793 return (aoClear[0], aoSet[0]);
794 assert self.isAndOrPair(sValue) is False;
795 return aoSet;
796
797 def isAndOrPair(self, sValue):
798 for sZeroFlag in self.kdZeroValueFlags:
799 if sValue.find(sZeroFlag) >= 0:
800 return True;
801 return False;
802
803class TestTypeFromDict(TestType):
804 """
805 Special value parsing for CR0.
806 """
807
808 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
809
810 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
811 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
812 self.kdConstantsAndValues = kdConstantsAndValues;
813 self.sConstantPrefix = sConstantPrefix;
814
815 def get(self, sValue):
816 fValue = 0;
817 for sFlag in sValue.split(','):
818 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
819 if fFlagValue is None:
820 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
821 fValue |= fFlagValue;
822 return TestType.get(self, '0x%x' % (fValue,));
823
824
825class TestInOut(object):
826 """
827 One input or output state modifier.
828
829 This should be thought as values to modify BS3REGCTX and extended (needs
830 to be structured) state.
831 """
832 ## Assigned operators.
833 kasOperators = [
834 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
835 '&~=',
836 '&=',
837 '|=',
838 '='
839 ];
840 ## Types
841 kdTypes = {
842 'uint': TestType('uint', fUnsigned = True),
843 'int': TestType('int'),
844 'efl': TestTypeEflags('efl'),
845 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
846 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
847 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
848 };
849 ## CPU context fields.
850 kdFields = {
851 # name: ( default type, [both|input|output], )
852 # Operands.
853 'op1': ( 'uint', 'both', ), ## \@op1
854 'op2': ( 'uint', 'both', ), ## \@op2
855 'op3': ( 'uint', 'both', ), ## \@op3
856 'op4': ( 'uint', 'both', ), ## \@op4
857 # Flags.
858 'efl': ( 'efl', 'both', ),
859 'efl_undef': ( 'uint', 'output', ),
860 # 8-bit GPRs.
861 'al': ( 'uint', 'both', ),
862 'cl': ( 'uint', 'both', ),
863 'dl': ( 'uint', 'both', ),
864 'bl': ( 'uint', 'both', ),
865 'ah': ( 'uint', 'both', ),
866 'ch': ( 'uint', 'both', ),
867 'dh': ( 'uint', 'both', ),
868 'bh': ( 'uint', 'both', ),
869 'r8l': ( 'uint', 'both', ),
870 'r9l': ( 'uint', 'both', ),
871 'r10l': ( 'uint', 'both', ),
872 'r11l': ( 'uint', 'both', ),
873 'r12l': ( 'uint', 'both', ),
874 'r13l': ( 'uint', 'both', ),
875 'r14l': ( 'uint', 'both', ),
876 'r15l': ( 'uint', 'both', ),
877 # 16-bit GPRs.
878 'ax': ( 'uint', 'both', ),
879 'dx': ( 'uint', 'both', ),
880 'cx': ( 'uint', 'both', ),
881 'bx': ( 'uint', 'both', ),
882 'sp': ( 'uint', 'both', ),
883 'bp': ( 'uint', 'both', ),
884 'si': ( 'uint', 'both', ),
885 'di': ( 'uint', 'both', ),
886 'r8w': ( 'uint', 'both', ),
887 'r9w': ( 'uint', 'both', ),
888 'r10w': ( 'uint', 'both', ),
889 'r11w': ( 'uint', 'both', ),
890 'r12w': ( 'uint', 'both', ),
891 'r13w': ( 'uint', 'both', ),
892 'r14w': ( 'uint', 'both', ),
893 'r15w': ( 'uint', 'both', ),
894 # 32-bit GPRs.
895 'eax': ( 'uint', 'both', ),
896 'edx': ( 'uint', 'both', ),
897 'ecx': ( 'uint', 'both', ),
898 'ebx': ( 'uint', 'both', ),
899 'esp': ( 'uint', 'both', ),
900 'ebp': ( 'uint', 'both', ),
901 'esi': ( 'uint', 'both', ),
902 'edi': ( 'uint', 'both', ),
903 'r8d': ( 'uint', 'both', ),
904 'r9d': ( 'uint', 'both', ),
905 'r10d': ( 'uint', 'both', ),
906 'r11d': ( 'uint', 'both', ),
907 'r12d': ( 'uint', 'both', ),
908 'r13d': ( 'uint', 'both', ),
909 'r14d': ( 'uint', 'both', ),
910 'r15d': ( 'uint', 'both', ),
911 # 64-bit GPRs.
912 'rax': ( 'uint', 'both', ),
913 'rdx': ( 'uint', 'both', ),
914 'rcx': ( 'uint', 'both', ),
915 'rbx': ( 'uint', 'both', ),
916 'rsp': ( 'uint', 'both', ),
917 'rbp': ( 'uint', 'both', ),
918 'rsi': ( 'uint', 'both', ),
919 'rdi': ( 'uint', 'both', ),
920 'r8': ( 'uint', 'both', ),
921 'r9': ( 'uint', 'both', ),
922 'r10': ( 'uint', 'both', ),
923 'r11': ( 'uint', 'both', ),
924 'r12': ( 'uint', 'both', ),
925 'r13': ( 'uint', 'both', ),
926 'r14': ( 'uint', 'both', ),
927 'r15': ( 'uint', 'both', ),
928 # 16-bit, 32-bit or 64-bit registers according to operand size.
929 'oz.rax': ( 'uint', 'both', ),
930 'oz.rdx': ( 'uint', 'both', ),
931 'oz.rcx': ( 'uint', 'both', ),
932 'oz.rbx': ( 'uint', 'both', ),
933 'oz.rsp': ( 'uint', 'both', ),
934 'oz.rbp': ( 'uint', 'both', ),
935 'oz.rsi': ( 'uint', 'both', ),
936 'oz.rdi': ( 'uint', 'both', ),
937 'oz.r8': ( 'uint', 'both', ),
938 'oz.r9': ( 'uint', 'both', ),
939 'oz.r10': ( 'uint', 'both', ),
940 'oz.r11': ( 'uint', 'both', ),
941 'oz.r12': ( 'uint', 'both', ),
942 'oz.r13': ( 'uint', 'both', ),
943 'oz.r14': ( 'uint', 'both', ),
944 'oz.r15': ( 'uint', 'both', ),
945 # Control registers.
946 'cr0': ( 'cr0', 'both', ),
947 'cr4': ( 'cr4', 'both', ),
948 'xcr0': ( 'xcr0', 'both', ),
949 # FPU Registers
950 'fcw': ( 'uint', 'both', ),
951 'fsw': ( 'uint', 'both', ),
952 'ftw': ( 'uint', 'both', ),
953 'fop': ( 'uint', 'both', ),
954 'fpuip': ( 'uint', 'both', ),
955 'fpucs': ( 'uint', 'both', ),
956 'fpudp': ( 'uint', 'both', ),
957 'fpuds': ( 'uint', 'both', ),
958 'mxcsr': ( 'uint', 'both', ),
959 'st0': ( 'uint', 'both', ),
960 'st1': ( 'uint', 'both', ),
961 'st2': ( 'uint', 'both', ),
962 'st3': ( 'uint', 'both', ),
963 'st4': ( 'uint', 'both', ),
964 'st5': ( 'uint', 'both', ),
965 'st6': ( 'uint', 'both', ),
966 'st7': ( 'uint', 'both', ),
967 # MMX registers.
968 'mm0': ( 'uint', 'both', ),
969 'mm1': ( 'uint', 'both', ),
970 'mm2': ( 'uint', 'both', ),
971 'mm3': ( 'uint', 'both', ),
972 'mm4': ( 'uint', 'both', ),
973 'mm5': ( 'uint', 'both', ),
974 'mm6': ( 'uint', 'both', ),
975 'mm7': ( 'uint', 'both', ),
976 # SSE registers.
977 'xmm0': ( 'uint', 'both', ),
978 'xmm1': ( 'uint', 'both', ),
979 'xmm2': ( 'uint', 'both', ),
980 'xmm3': ( 'uint', 'both', ),
981 'xmm4': ( 'uint', 'both', ),
982 'xmm5': ( 'uint', 'both', ),
983 'xmm6': ( 'uint', 'both', ),
984 'xmm7': ( 'uint', 'both', ),
985 'xmm8': ( 'uint', 'both', ),
986 'xmm9': ( 'uint', 'both', ),
987 'xmm10': ( 'uint', 'both', ),
988 'xmm11': ( 'uint', 'both', ),
989 'xmm12': ( 'uint', 'both', ),
990 'xmm13': ( 'uint', 'both', ),
991 'xmm14': ( 'uint', 'both', ),
992 'xmm15': ( 'uint', 'both', ),
993 'xmm0.lo': ( 'uint', 'both', ),
994 'xmm1.lo': ( 'uint', 'both', ),
995 'xmm2.lo': ( 'uint', 'both', ),
996 'xmm3.lo': ( 'uint', 'both', ),
997 'xmm4.lo': ( 'uint', 'both', ),
998 'xmm5.lo': ( 'uint', 'both', ),
999 'xmm6.lo': ( 'uint', 'both', ),
1000 'xmm7.lo': ( 'uint', 'both', ),
1001 'xmm8.lo': ( 'uint', 'both', ),
1002 'xmm9.lo': ( 'uint', 'both', ),
1003 'xmm10.lo': ( 'uint', 'both', ),
1004 'xmm11.lo': ( 'uint', 'both', ),
1005 'xmm12.lo': ( 'uint', 'both', ),
1006 'xmm13.lo': ( 'uint', 'both', ),
1007 'xmm14.lo': ( 'uint', 'both', ),
1008 'xmm15.lo': ( 'uint', 'both', ),
1009 'xmm0.hi': ( 'uint', 'both', ),
1010 'xmm1.hi': ( 'uint', 'both', ),
1011 'xmm2.hi': ( 'uint', 'both', ),
1012 'xmm3.hi': ( 'uint', 'both', ),
1013 'xmm4.hi': ( 'uint', 'both', ),
1014 'xmm5.hi': ( 'uint', 'both', ),
1015 'xmm6.hi': ( 'uint', 'both', ),
1016 'xmm7.hi': ( 'uint', 'both', ),
1017 'xmm8.hi': ( 'uint', 'both', ),
1018 'xmm9.hi': ( 'uint', 'both', ),
1019 'xmm10.hi': ( 'uint', 'both', ),
1020 'xmm11.hi': ( 'uint', 'both', ),
1021 'xmm12.hi': ( 'uint', 'both', ),
1022 'xmm13.hi': ( 'uint', 'both', ),
1023 'xmm14.hi': ( 'uint', 'both', ),
1024 'xmm15.hi': ( 'uint', 'both', ),
1025 'xmm0.lo.zx': ( 'uint', 'both', ),
1026 'xmm1.lo.zx': ( 'uint', 'both', ),
1027 'xmm2.lo.zx': ( 'uint', 'both', ),
1028 'xmm3.lo.zx': ( 'uint', 'both', ),
1029 'xmm4.lo.zx': ( 'uint', 'both', ),
1030 'xmm5.lo.zx': ( 'uint', 'both', ),
1031 'xmm6.lo.zx': ( 'uint', 'both', ),
1032 'xmm7.lo.zx': ( 'uint', 'both', ),
1033 'xmm8.lo.zx': ( 'uint', 'both', ),
1034 'xmm9.lo.zx': ( 'uint', 'both', ),
1035 'xmm10.lo.zx': ( 'uint', 'both', ),
1036 'xmm11.lo.zx': ( 'uint', 'both', ),
1037 'xmm12.lo.zx': ( 'uint', 'both', ),
1038 'xmm13.lo.zx': ( 'uint', 'both', ),
1039 'xmm14.lo.zx': ( 'uint', 'both', ),
1040 'xmm15.lo.zx': ( 'uint', 'both', ),
1041 'xmm0.dw0': ( 'uint', 'both', ),
1042 'xmm1.dw0': ( 'uint', 'both', ),
1043 'xmm2.dw0': ( 'uint', 'both', ),
1044 'xmm3.dw0': ( 'uint', 'both', ),
1045 'xmm4.dw0': ( 'uint', 'both', ),
1046 'xmm5.dw0': ( 'uint', 'both', ),
1047 'xmm6.dw0': ( 'uint', 'both', ),
1048 'xmm7.dw0': ( 'uint', 'both', ),
1049 'xmm8.dw0': ( 'uint', 'both', ),
1050 'xmm9.dw0': ( 'uint', 'both', ),
1051 'xmm10.dw0': ( 'uint', 'both', ),
1052 'xmm11.dw0': ( 'uint', 'both', ),
1053 'xmm12.dw0': ( 'uint', 'both', ),
1054 'xmm13.dw0': ( 'uint', 'both', ),
1055 'xmm14.dw0': ( 'uint', 'both', ),
1056 'xmm15_dw0': ( 'uint', 'both', ),
1057 # AVX registers.
1058 'ymm0': ( 'uint', 'both', ),
1059 'ymm1': ( 'uint', 'both', ),
1060 'ymm2': ( 'uint', 'both', ),
1061 'ymm3': ( 'uint', 'both', ),
1062 'ymm4': ( 'uint', 'both', ),
1063 'ymm5': ( 'uint', 'both', ),
1064 'ymm6': ( 'uint', 'both', ),
1065 'ymm7': ( 'uint', 'both', ),
1066 'ymm8': ( 'uint', 'both', ),
1067 'ymm9': ( 'uint', 'both', ),
1068 'ymm10': ( 'uint', 'both', ),
1069 'ymm11': ( 'uint', 'both', ),
1070 'ymm12': ( 'uint', 'both', ),
1071 'ymm13': ( 'uint', 'both', ),
1072 'ymm14': ( 'uint', 'both', ),
1073 'ymm15': ( 'uint', 'both', ),
1074
1075 # Special ones.
1076 'value.xcpt': ( 'uint', 'output', ),
1077 };
1078
1079 def __init__(self, sField, sOp, sValue, sType):
1080 assert sField in self.kdFields;
1081 assert sOp in self.kasOperators;
1082 self.sField = sField;
1083 self.sOp = sOp;
1084 self.sValue = sValue;
1085 self.sType = sType;
1086 assert isinstance(sField, str);
1087 assert isinstance(sOp, str);
1088 assert isinstance(sType, str);
1089 assert isinstance(sValue, str);
1090
1091
1092class TestSelector(object):
1093 """
1094 One selector for an instruction test.
1095 """
1096 ## Selector compare operators.
1097 kasCompareOps = [ '==', '!=' ];
1098 ## Selector variables and their valid values.
1099 kdVariables = {
1100 # Operand size.
1101 'size': {
1102 'o16': 'size_o16',
1103 'o32': 'size_o32',
1104 'o64': 'size_o64',
1105 },
1106 # Execution ring.
1107 'ring': {
1108 '0': 'ring_0',
1109 '1': 'ring_1',
1110 '2': 'ring_2',
1111 '3': 'ring_3',
1112 '0..2': 'ring_0_thru_2',
1113 '1..3': 'ring_1_thru_3',
1114 },
1115 # Basic code mode.
1116 'codebits': {
1117 '64': 'code_64bit',
1118 '32': 'code_32bit',
1119 '16': 'code_16bit',
1120 },
1121 # cpu modes.
1122 'mode': {
1123 'real': 'mode_real',
1124 'prot': 'mode_prot',
1125 'long': 'mode_long',
1126 'v86': 'mode_v86',
1127 'smm': 'mode_smm',
1128 'vmx': 'mode_vmx',
1129 'svm': 'mode_svm',
1130 },
1131 # paging on/off
1132 'paging': {
1133 'on': 'paging_on',
1134 'off': 'paging_off',
1135 },
1136 # CPU vendor
1137 'vendor': {
1138 'amd': 'vendor_amd',
1139 'intel': 'vendor_intel',
1140 'via': 'vendor_via',
1141 },
1142 };
1143 ## Selector shorthand predicates.
1144 ## These translates into variable expressions.
1145 kdPredicates = {
1146 'o16': 'size==o16',
1147 'o32': 'size==o32',
1148 'o64': 'size==o64',
1149 'ring0': 'ring==0',
1150 '!ring0': 'ring==1..3',
1151 'ring1': 'ring==1',
1152 'ring2': 'ring==2',
1153 'ring3': 'ring==3',
1154 'user': 'ring==3',
1155 'supervisor': 'ring==0..2',
1156 'real': 'mode==real',
1157 'prot': 'mode==prot',
1158 'long': 'mode==long',
1159 'v86': 'mode==v86',
1160 'smm': 'mode==smm',
1161 'vmx': 'mode==vmx',
1162 'svm': 'mode==svm',
1163 'paging': 'paging==on',
1164 '!paging': 'paging==off',
1165 'amd': 'vendor==amd',
1166 '!amd': 'vendor!=amd',
1167 'intel': 'vendor==intel',
1168 '!intel': 'vendor!=intel',
1169 'via': 'vendor==via',
1170 '!via': 'vendor!=via',
1171 };
1172
1173 def __init__(self, sVariable, sOp, sValue):
1174 assert sVariable in self.kdVariables;
1175 assert sOp in self.kasCompareOps;
1176 assert sValue in self.kdVariables[sVariable];
1177 self.sVariable = sVariable;
1178 self.sOp = sOp;
1179 self.sValue = sValue;
1180
1181
1182class InstructionTest(object):
1183 """
1184 Instruction test.
1185 """
1186
1187 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1188 self.oInstr = oInstr; # type: InstructionTest
1189 self.aoInputs = []; # type: list(TestInOut)
1190 self.aoOutputs = []; # type: list(TestInOut)
1191 self.aoSelectors = []; # type: list(TestSelector)
1192
1193 def toString(self, fRepr = False):
1194 """
1195 Converts it to string representation.
1196 """
1197 asWords = [];
1198 if self.aoSelectors:
1199 for oSelector in self.aoSelectors:
1200 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1201 asWords.append('/');
1202
1203 for oModifier in self.aoInputs:
1204 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1205
1206 asWords.append('->');
1207
1208 for oModifier in self.aoOutputs:
1209 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1210
1211 if fRepr:
1212 return '<' + ' '.join(asWords) + '>';
1213 return ' '.join(asWords);
1214
1215 def __str__(self):
1216 """ Provide string represenation. """
1217 return self.toString(False);
1218
1219 def __repr__(self):
1220 """ Provide unambigious string representation. """
1221 return self.toString(True);
1222
1223class Operand(object):
1224 """
1225 Instruction operand.
1226 """
1227
1228 def __init__(self, sWhere, sType):
1229 assert sWhere in g_kdOpLocations, sWhere;
1230 assert sType in g_kdOpTypes, sType;
1231 self.sWhere = sWhere; ##< g_kdOpLocations
1232 self.sType = sType; ##< g_kdOpTypes
1233
1234 def usesModRM(self):
1235 """ Returns True if using some form of ModR/M encoding. """
1236 return self.sType[0] in ['E', 'G', 'M'];
1237
1238
1239
1240class Instruction(object): # pylint: disable=too-many-instance-attributes
1241 """
1242 Instruction.
1243 """
1244
1245 def __init__(self, sSrcFile, iLine):
1246 ## @name Core attributes.
1247 ## @{
1248 self.sMnemonic = None;
1249 self.sBrief = None;
1250 self.asDescSections = []; # type: list(str)
1251 self.aoMaps = []; # type: list(InstructionMap)
1252 self.aoOperands = []; # type: list(Operand)
1253 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1254 self.sOpcode = None; # type: str
1255 self.sSubOpcode = None; # type: str
1256 self.sEncoding = None;
1257 self.asFlTest = None;
1258 self.asFlModify = None;
1259 self.asFlUndefined = None;
1260 self.asFlSet = None;
1261 self.asFlClear = None;
1262 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1263 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1264 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1265 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1266 self.aoTests = []; # type: list(InstructionTest)
1267 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1268 self.oCpuExpr = None; ##< Some CPU restriction expression...
1269 self.sGroup = None;
1270 self.fUnused = False; ##< Unused instruction.
1271 self.fInvalid = False; ##< Invalid instruction (like UD2).
1272 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1273 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1274 ## @}
1275
1276 ## @name Implementation attributes.
1277 ## @{
1278 self.sStats = None;
1279 self.sFunction = None;
1280 self.fStub = False;
1281 self.fUdStub = False;
1282 ## @}
1283
1284 ## @name Decoding info
1285 ## @{
1286 self.sSrcFile = sSrcFile;
1287 self.iLineCreated = iLine;
1288 self.iLineCompleted = None;
1289 self.cOpTags = 0;
1290 self.iLineFnIemOpMacro = -1;
1291 self.iLineMnemonicMacro = -1;
1292 ## @}
1293
1294 ## @name Intermediate input fields.
1295 ## @{
1296 self.sRawDisOpNo = None;
1297 self.asRawDisParams = [];
1298 self.sRawIemOpFlags = None;
1299 self.sRawOldOpcodes = None;
1300 self.asCopyTests = [];
1301 ## @}
1302
1303 def toString(self, fRepr = False):
1304 """ Turn object into a string. """
1305 aasFields = [];
1306
1307 aasFields.append(['opcode', self.sOpcode]);
1308 aasFields.append(['mnemonic', self.sMnemonic]);
1309 for iOperand, oOperand in enumerate(self.aoOperands):
1310 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1311 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1312 aasFields.append(['encoding', self.sEncoding]);
1313 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1314 aasFields.append(['disenum', self.sDisEnum]);
1315 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1316 aasFields.append(['group', self.sGroup]);
1317 if self.fUnused: aasFields.append(['unused', 'True']);
1318 if self.fInvalid: aasFields.append(['invalid', 'True']);
1319 aasFields.append(['invlstyle', self.sInvalidStyle]);
1320 aasFields.append(['fltest', self.asFlTest]);
1321 aasFields.append(['flmodify', self.asFlModify]);
1322 aasFields.append(['flundef', self.asFlUndefined]);
1323 aasFields.append(['flset', self.asFlSet]);
1324 aasFields.append(['flclear', self.asFlClear]);
1325 aasFields.append(['mincpu', self.sMinCpu]);
1326 aasFields.append(['stats', self.sStats]);
1327 aasFields.append(['sFunction', self.sFunction]);
1328 if self.fStub: aasFields.append(['fStub', 'True']);
1329 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1330 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1331 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1332 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1333
1334 sRet = '<' if fRepr else '';
1335 for sField, sValue in aasFields:
1336 if sValue != None:
1337 if len(sRet) > 1:
1338 sRet += '; ';
1339 sRet += '%s=%s' % (sField, sValue,);
1340 if fRepr:
1341 sRet += '>';
1342
1343 return sRet;
1344
1345 def __str__(self):
1346 """ Provide string represenation. """
1347 return self.toString(False);
1348
1349 def __repr__(self):
1350 """ Provide unambigious string representation. """
1351 return self.toString(True);
1352
1353 def getOpcodeByte(self):
1354 """
1355 Decodes sOpcode into a byte range integer value.
1356 Raises exception if sOpcode is None or invalid.
1357 """
1358 if self.sOpcode is None:
1359 raise Exception('No opcode byte for %s!' % (self,));
1360 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1361
1362 # Full hex byte form.
1363 if sOpcode[:2] == '0x':
1364 return int(sOpcode, 16);
1365
1366 # The /r form:
1367 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1368 return int(sOpcode[-1:]) << 3;
1369
1370 # The 11/r form:
1371 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1372 return (int(sOpcode[-1:]) << 3) | 0xc0;
1373
1374 # The !11/r form (returns mod=1):
1375 ## @todo this doesn't really work...
1376 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1377 return (int(sOpcode[-1:]) << 3) | 0x80;
1378
1379 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1380
1381 @staticmethod
1382 def _flagsToIntegerMask(asFlags):
1383 """
1384 Returns the integer mask value for asFlags.
1385 """
1386 uRet = 0;
1387 if asFlags:
1388 for sFlag in asFlags:
1389 sConstant = g_kdEFlagsMnemonics[sFlag];
1390 assert sConstant[0] != '!', sConstant
1391 uRet |= g_kdX86EFlagsConstants[sConstant];
1392 return uRet;
1393
1394 def getTestedFlagsMask(self):
1395 """ Returns asFlTest into a integer mask value """
1396 return self._flagsToIntegerMask(self.asFlTest);
1397
1398 def getModifiedFlagsMask(self):
1399 """ Returns asFlModify into a integer mask value """
1400 return self._flagsToIntegerMask(self.asFlModify);
1401
1402 def getUndefinedFlagsMask(self):
1403 """ Returns asFlUndefined into a integer mask value """
1404 return self._flagsToIntegerMask(self.asFlUndefined);
1405
1406 def getSetFlagsMask(self):
1407 """ Returns asFlSet into a integer mask value """
1408 return self._flagsToIntegerMask(self.asFlSet);
1409
1410 def getClearedFlagsMask(self):
1411 """ Returns asFlClear into a integer mask value """
1412 return self._flagsToIntegerMask(self.asFlClear);
1413
1414
1415## All the instructions.
1416g_aoAllInstructions = []; # type: list(Instruction)
1417
1418## All the instructions indexed by statistics name (opstat).
1419g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1420
1421## All the instructions indexed by function name (opfunction).
1422g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1423
1424## Instructions tagged by oponlytest
1425g_aoOnlyTestInstructions = []; # type: list(Instruction)
1426
1427## Instruction maps.
1428g_dInstructionMaps = {
1429 'one': InstructionMap('one'),
1430 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1431 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1432 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1433 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1434 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1435 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1436 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1437 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1438 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1439 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1440 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1441 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1442 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1443 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1444 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1445 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1446 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1447 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1448 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1449
1450 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1451 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1452 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1453 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1454 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1455 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1456 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1457 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1458 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1459 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1460 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1461 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1462 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1463 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1464
1465 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1466 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1467
1468 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1469 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1470 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1471 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1472 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1473 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1474
1475 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1476 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1477
1478 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1479 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1480 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1481 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1482 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1483 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1484 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1485};
1486
1487
1488
1489class ParserException(Exception):
1490 """ Parser exception """
1491 def __init__(self, sMessage):
1492 Exception.__init__(self, sMessage);
1493
1494
1495class SimpleParser(object):
1496 """
1497 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1498 """
1499
1500 ## @name Parser state.
1501 ## @{
1502 kiCode = 0;
1503 kiCommentMulti = 1;
1504 ## @}
1505
1506 def __init__(self, sSrcFile, asLines, sDefaultMap):
1507 self.sSrcFile = sSrcFile;
1508 self.asLines = asLines;
1509 self.iLine = 0;
1510 self.iState = self.kiCode;
1511 self.sComment = '';
1512 self.iCommentLine = 0;
1513 self.aoCurInstrs = [];
1514
1515 assert sDefaultMap in g_dInstructionMaps;
1516 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1517
1518 self.cTotalInstr = 0;
1519 self.cTotalStubs = 0;
1520 self.cTotalTagged = 0;
1521
1522 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1523 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1524 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1525 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1526 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1527 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1528 self.fDebug = True;
1529
1530 self.dTagHandlers = {
1531 '@opbrief': self.parseTagOpBrief,
1532 '@opdesc': self.parseTagOpDesc,
1533 '@opmnemonic': self.parseTagOpMnemonic,
1534 '@op1': self.parseTagOpOperandN,
1535 '@op2': self.parseTagOpOperandN,
1536 '@op3': self.parseTagOpOperandN,
1537 '@op4': self.parseTagOpOperandN,
1538 '@oppfx': self.parseTagOpPfx,
1539 '@opmaps': self.parseTagOpMaps,
1540 '@opcode': self.parseTagOpcode,
1541 '@opcodesub': self.parseTagOpcodeSub,
1542 '@openc': self.parseTagOpEnc,
1543 '@opfltest': self.parseTagOpEFlags,
1544 '@opflmodify': self.parseTagOpEFlags,
1545 '@opflundef': self.parseTagOpEFlags,
1546 '@opflset': self.parseTagOpEFlags,
1547 '@opflclear': self.parseTagOpEFlags,
1548 '@ophints': self.parseTagOpHints,
1549 '@opdisenum': self.parseTagOpDisEnum,
1550 '@opmincpu': self.parseTagOpMinCpu,
1551 '@opcpuid': self.parseTagOpCpuId,
1552 '@opgroup': self.parseTagOpGroup,
1553 '@opunused': self.parseTagOpUnusedInvalid,
1554 '@opinvalid': self.parseTagOpUnusedInvalid,
1555 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1556 '@optest': self.parseTagOpTest,
1557 '@optestign': self.parseTagOpTestIgnore,
1558 '@optestignore': self.parseTagOpTestIgnore,
1559 '@opcopytests': self.parseTagOpCopyTests,
1560 '@oponly': self.parseTagOpOnlyTest,
1561 '@oponlytest': self.parseTagOpOnlyTest,
1562 '@opxcpttype': self.parseTagOpXcptType,
1563 '@opstats': self.parseTagOpStats,
1564 '@opfunction': self.parseTagOpFunction,
1565 '@opdone': self.parseTagOpDone,
1566 };
1567 for i in range(48):
1568 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1569 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1570
1571 self.asErrors = [];
1572
1573 def raiseError(self, sMessage):
1574 """
1575 Raise error prefixed with the source and line number.
1576 """
1577 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1578
1579 def raiseCommentError(self, iLineInComment, sMessage):
1580 """
1581 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1582 """
1583 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1584
1585 def error(self, sMessage):
1586 """
1587 Adds an error.
1588 returns False;
1589 """
1590 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1591 return False;
1592
1593 def errorComment(self, iLineInComment, sMessage):
1594 """
1595 Adds a comment error.
1596 returns False;
1597 """
1598 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1599 return False;
1600
1601 def printErrors(self):
1602 """
1603 Print the errors to stderr.
1604 Returns number of errors.
1605 """
1606 if self.asErrors:
1607 sys.stderr.write(u''.join(self.asErrors));
1608 return len(self.asErrors);
1609
1610 def debug(self, sMessage):
1611 """
1612 For debugging.
1613 """
1614 if self.fDebug:
1615 print('debug: %s' % (sMessage,));
1616
1617
1618 def addInstruction(self, iLine = None):
1619 """
1620 Adds an instruction.
1621 """
1622 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1623 g_aoAllInstructions.append(oInstr);
1624 self.aoCurInstrs.append(oInstr);
1625 return oInstr;
1626
1627 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1628 """
1629 Derives the mnemonic and operands from a IEM stats base name like string.
1630 """
1631 if oInstr.sMnemonic is None:
1632 asWords = sStats.split('_');
1633 oInstr.sMnemonic = asWords[0].lower();
1634 if len(asWords) > 1 and not oInstr.aoOperands:
1635 for sType in asWords[1:]:
1636 if sType in g_kdOpTypes:
1637 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1638 else:
1639 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1640 return False;
1641 return True;
1642
1643 def doneInstructionOne(self, oInstr, iLine):
1644 """
1645 Complete the parsing by processing, validating and expanding raw inputs.
1646 """
1647 assert oInstr.iLineCompleted is None;
1648 oInstr.iLineCompleted = iLine;
1649
1650 #
1651 # Specified instructions.
1652 #
1653 if oInstr.cOpTags > 0:
1654 if oInstr.sStats is None:
1655 pass;
1656
1657 #
1658 # Unspecified legacy stuff. We generally only got a few things to go on here.
1659 # /** Opcode 0x0f 0x00 /0. */
1660 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1661 #
1662 else:
1663 #if oInstr.sRawOldOpcodes:
1664 #
1665 #if oInstr.sMnemonic:
1666 pass;
1667
1668 #
1669 # Common defaults.
1670 #
1671
1672 # Guess mnemonic and operands from stats if the former is missing.
1673 if oInstr.sMnemonic is None:
1674 if oInstr.sStats is not None:
1675 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1676 elif oInstr.sFunction is not None:
1677 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1678
1679 # Derive the disassembler op enum constant from the mnemonic.
1680 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1681 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1682
1683 # Derive the IEM statistics base name from mnemonic and operand types.
1684 if oInstr.sStats is None:
1685 if oInstr.sFunction is not None:
1686 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1687 elif oInstr.sMnemonic is not None:
1688 oInstr.sStats = oInstr.sMnemonic;
1689 for oOperand in oInstr.aoOperands:
1690 if oOperand.sType:
1691 oInstr.sStats += '_' + oOperand.sType;
1692
1693 # Derive the IEM function name from mnemonic and operand types.
1694 if oInstr.sFunction is None:
1695 if oInstr.sMnemonic is not None:
1696 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1697 for oOperand in oInstr.aoOperands:
1698 if oOperand.sType:
1699 oInstr.sFunction += '_' + oOperand.sType;
1700 elif oInstr.sStats:
1701 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1702
1703 # Derive encoding from operands.
1704 if oInstr.sEncoding is None:
1705 if not oInstr.aoOperands:
1706 if oInstr.fUnused and oInstr.sSubOpcode:
1707 oInstr.sEncoding = 'ModR/M';
1708 else:
1709 oInstr.sEncoding = 'fixed';
1710 elif oInstr.aoOperands[0].usesModRM():
1711 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1712 oInstr.sEncoding = 'ModR/M+VEX';
1713 else:
1714 oInstr.sEncoding = 'ModR/M';
1715
1716 #
1717 # Apply default map and then add the instruction to all it's groups.
1718 #
1719 if not oInstr.aoMaps:
1720 oInstr.aoMaps = [ self.oDefaultMap, ];
1721 for oMap in oInstr.aoMaps:
1722 oMap.aoInstructions.append(oInstr);
1723
1724 #
1725 # Check the opstat value and add it to the opstat indexed dictionary.
1726 #
1727 if oInstr.sStats:
1728 if oInstr.sStats not in g_dAllInstructionsByStat:
1729 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1730 else:
1731 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1732 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1733
1734 #
1735 # Add to function indexed dictionary. We allow multiple instructions per function.
1736 #
1737 if oInstr.sFunction:
1738 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1739 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1740 else:
1741 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1742
1743 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1744 return True;
1745
1746 def doneInstructions(self, iLineInComment = None):
1747 """
1748 Done with current instruction.
1749 """
1750 for oInstr in self.aoCurInstrs:
1751 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1752 if oInstr.fStub:
1753 self.cTotalStubs += 1;
1754
1755 self.cTotalInstr += len(self.aoCurInstrs);
1756
1757 self.sComment = '';
1758 self.aoCurInstrs = [];
1759 return True;
1760
1761 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1762 """
1763 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1764 is False, only None values and empty strings are replaced.
1765 """
1766 for oInstr in self.aoCurInstrs:
1767 if fOverwrite is not True:
1768 oOldValue = getattr(oInstr, sAttrib);
1769 if oOldValue is not None:
1770 continue;
1771 setattr(oInstr, sAttrib, oValue);
1772
1773 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1774 """
1775 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1776 If fOverwrite is False, only None values and empty strings are replaced.
1777 """
1778 for oInstr in self.aoCurInstrs:
1779 aoArray = getattr(oInstr, sAttrib);
1780 while len(aoArray) <= iEntry:
1781 aoArray.append(None);
1782 if fOverwrite is True or aoArray[iEntry] is None:
1783 aoArray[iEntry] = oValue;
1784
1785 def parseCommentOldOpcode(self, asLines):
1786 """ Deals with 'Opcode 0xff /4' like comments """
1787 asWords = asLines[0].split();
1788 if len(asWords) >= 2 \
1789 and asWords[0] == 'Opcode' \
1790 and ( asWords[1].startswith('0x')
1791 or asWords[1].startswith('0X')):
1792 asWords = asWords[:1];
1793 for iWord, sWord in enumerate(asWords):
1794 if sWord.startswith('0X'):
1795 sWord = '0x' + sWord[:2];
1796 asWords[iWord] = asWords;
1797 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1798
1799 return False;
1800
1801 def ensureInstructionForOpTag(self, iTagLine):
1802 """ Ensure there is an instruction for the op-tag being parsed. """
1803 if not self.aoCurInstrs:
1804 self.addInstruction(self.iCommentLine + iTagLine);
1805 for oInstr in self.aoCurInstrs:
1806 oInstr.cOpTags += 1;
1807 if oInstr.cOpTags == 1:
1808 self.cTotalTagged += 1;
1809 return self.aoCurInstrs[-1];
1810
1811 @staticmethod
1812 def flattenSections(aasSections):
1813 """
1814 Flattens multiline sections into stripped single strings.
1815 Returns list of strings, on section per string.
1816 """
1817 asRet = [];
1818 for asLines in aasSections:
1819 if asLines:
1820 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1821 return asRet;
1822
1823 @staticmethod
1824 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1825 """
1826 Flattens sections into a simple stripped string with newlines as
1827 section breaks. The final section does not sport a trailing newline.
1828 """
1829 # Typical: One section with a single line.
1830 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1831 return aasSections[0][0].strip();
1832
1833 sRet = '';
1834 for iSection, asLines in enumerate(aasSections):
1835 if asLines:
1836 if iSection > 0:
1837 sRet += sSectionSep;
1838 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1839 return sRet;
1840
1841
1842
1843 ## @name Tag parsers
1844 ## @{
1845
1846 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1847 """
1848 Tag: \@opbrief
1849 Value: Text description, multiple sections, appended.
1850
1851 Brief description. If not given, it's the first sentence from @opdesc.
1852 """
1853 oInstr = self.ensureInstructionForOpTag(iTagLine);
1854
1855 # Flatten and validate the value.
1856 sBrief = self.flattenAllSections(aasSections);
1857 if not sBrief:
1858 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1859 if sBrief[-1] != '.':
1860 sBrief = sBrief + '.';
1861 if len(sBrief) > 180:
1862 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1863 offDot = sBrief.find('.');
1864 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1865 offDot = sBrief.find('.', offDot + 1);
1866 if offDot >= 0 and offDot != len(sBrief) - 1:
1867 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1868
1869 # Update the instruction.
1870 if oInstr.sBrief is not None:
1871 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1872 % (sTag, oInstr.sBrief, sBrief,));
1873 _ = iEndLine;
1874 return True;
1875
1876 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1877 """
1878 Tag: \@opdesc
1879 Value: Text description, multiple sections, appended.
1880
1881 It is used to describe instructions.
1882 """
1883 oInstr = self.ensureInstructionForOpTag(iTagLine);
1884 if aasSections:
1885 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1886 return True;
1887
1888 _ = sTag; _ = iEndLine;
1889 return True;
1890
1891 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1892 """
1893 Tag: @opmenmonic
1894 Value: mnemonic
1895
1896 The 'mnemonic' value must be a valid C identifier string. Because of
1897 prefixes, groups and whatnot, there times when the mnemonic isn't that
1898 of an actual assembler mnemonic.
1899 """
1900 oInstr = self.ensureInstructionForOpTag(iTagLine);
1901
1902 # Flatten and validate the value.
1903 sMnemonic = self.flattenAllSections(aasSections);
1904 if not self.oReMnemonic.match(sMnemonic):
1905 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1906 if oInstr.sMnemonic is not None:
1907 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1908 % (sTag, oInstr.sMnemonic, sMnemonic,));
1909 oInstr.sMnemonic = sMnemonic
1910
1911 _ = iEndLine;
1912 return True;
1913
1914 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1915 """
1916 Tags: \@op1, \@op2, \@op3, \@op4
1917 Value: [where:]type
1918
1919 The 'where' value indicates where the operand is found, like the 'reg'
1920 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1921 a list.
1922
1923 The 'type' value indicates the operand type. These follow the types
1924 given in the opcode tables in the CPU reference manuals.
1925 See Instruction.kdOperandTypes for a list.
1926
1927 """
1928 oInstr = self.ensureInstructionForOpTag(iTagLine);
1929 idxOp = int(sTag[-1]) - 1;
1930 assert idxOp >= 0 and idxOp < 4;
1931
1932 # flatten, split up, and validate the "where:type" value.
1933 sFlattened = self.flattenAllSections(aasSections);
1934 asSplit = sFlattened.split(':');
1935 if len(asSplit) == 1:
1936 sType = asSplit[0];
1937 sWhere = None;
1938 elif len(asSplit) == 2:
1939 (sWhere, sType) = asSplit;
1940 else:
1941 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1942
1943 if sType not in g_kdOpTypes:
1944 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1945 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1946 if sWhere is None:
1947 sWhere = g_kdOpTypes[sType][1];
1948 elif sWhere not in g_kdOpLocations:
1949 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1950 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1951
1952 # Insert the operand, refusing to overwrite an existing one.
1953 while idxOp >= len(oInstr.aoOperands):
1954 oInstr.aoOperands.append(None);
1955 if oInstr.aoOperands[idxOp] is not None:
1956 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1957 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1958 sWhere, sType,));
1959 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1960
1961 _ = iEndLine;
1962 return True;
1963
1964 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1965 """
1966 Tag: \@opmaps
1967 Value: map[,map2]
1968
1969 Indicates which maps the instruction is in. There is a default map
1970 associated with each input file.
1971 """
1972 oInstr = self.ensureInstructionForOpTag(iTagLine);
1973
1974 # Flatten, split up and validate the value.
1975 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1976 asMaps = sFlattened.split(',');
1977 if not asMaps:
1978 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1979 for sMap in asMaps:
1980 if sMap not in g_dInstructionMaps:
1981 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1982 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1983
1984 # Add the maps to the current list. Throw errors on duplicates.
1985 for oMap in oInstr.aoMaps:
1986 if oMap.sName in asMaps:
1987 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1988
1989 for sMap in asMaps:
1990 oMap = g_dInstructionMaps[sMap];
1991 if oMap not in oInstr.aoMaps:
1992 oInstr.aoMaps.append(oMap);
1993 else:
1994 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1995
1996 _ = iEndLine;
1997 return True;
1998
1999 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2000 """
2001 Tag: \@oppfx
2002 Value: n/a|none|0x66|0xf3|0xf2
2003
2004 Required prefix for the instruction. (In a (E)VEX context this is the
2005 value of the 'pp' field rather than an actual prefix.)
2006 """
2007 oInstr = self.ensureInstructionForOpTag(iTagLine);
2008
2009 # Flatten and validate the value.
2010 sFlattened = self.flattenAllSections(aasSections);
2011 asPrefixes = sFlattened.split();
2012 if len(asPrefixes) > 1:
2013 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2014
2015 sPrefix = asPrefixes[0].lower();
2016 if sPrefix == 'none':
2017 sPrefix = 'none';
2018 elif sPrefix == 'n/a':
2019 sPrefix = None;
2020 else:
2021 if len(sPrefix) == 2:
2022 sPrefix = '0x' + sPrefix;
2023 if not _isValidOpcodeByte(sPrefix):
2024 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2025
2026 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2027 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2028
2029 # Set it.
2030 if oInstr.sPrefix is not None:
2031 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2032 oInstr.sPrefix = sPrefix;
2033
2034 _ = iEndLine;
2035 return True;
2036
2037 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2038 """
2039 Tag: \@opcode
2040 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2041
2042 The opcode byte or sub-byte for the instruction in the context of a map.
2043 """
2044 oInstr = self.ensureInstructionForOpTag(iTagLine);
2045
2046 # Flatten and validate the value.
2047 sOpcode = self.flattenAllSections(aasSections);
2048 if _isValidOpcodeByte(sOpcode):
2049 pass;
2050 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2051 pass;
2052 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2053 pass;
2054 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2055 pass;
2056 else:
2057 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2058
2059 # Set it.
2060 if oInstr.sOpcode is not None:
2061 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2062 oInstr.sOpcode = sOpcode;
2063
2064 _ = iEndLine;
2065 return True;
2066
2067 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2068 """
2069 Tag: \@opcodesub
2070 Value: none | 11 mr/reg | !11 mr/reg
2071
2072 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2073 represents exactly two different instructions. The more proper way would
2074 be to go via maps with two members, but this is faster.
2075 """
2076 oInstr = self.ensureInstructionForOpTag(iTagLine);
2077
2078 # Flatten and validate the value.
2079 sSubOpcode = self.flattenAllSections(aasSections);
2080 if sSubOpcode not in g_kdSubOpcodes:
2081 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2082 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2083
2084 # Set it.
2085 if oInstr.sSubOpcode is not None:
2086 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2087 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2088 oInstr.sSubOpcode = sSubOpcode;
2089
2090 _ = iEndLine;
2091 return True;
2092
2093 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2094 """
2095 Tag: \@openc
2096 Value: ModR/M|fixed|prefix|<map name>
2097
2098 The instruction operand encoding style.
2099 """
2100 oInstr = self.ensureInstructionForOpTag(iTagLine);
2101
2102 # Flatten and validate the value.
2103 sEncoding = self.flattenAllSections(aasSections);
2104 if sEncoding in g_kdEncodings:
2105 pass;
2106 elif sEncoding in g_dInstructionMaps:
2107 pass;
2108 elif not _isValidOpcodeByte(sEncoding):
2109 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2110
2111 # Set it.
2112 if oInstr.sEncoding is not None:
2113 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2114 % ( sTag, oInstr.sEncoding, sEncoding,));
2115 oInstr.sEncoding = sEncoding;
2116
2117 _ = iEndLine;
2118 return True;
2119
2120 ## EFlags tag to Instruction attribute name.
2121 kdOpFlagToAttr = {
2122 '@opfltest': 'asFlTest',
2123 '@opflmodify': 'asFlModify',
2124 '@opflundef': 'asFlUndefined',
2125 '@opflset': 'asFlSet',
2126 '@opflclear': 'asFlClear',
2127 };
2128
2129 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2130 """
2131 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2132 Value: <eflags specifier>
2133
2134 """
2135 oInstr = self.ensureInstructionForOpTag(iTagLine);
2136
2137 # Flatten, split up and validate the values.
2138 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2139 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2140 asFlags = [];
2141 else:
2142 fRc = True;
2143 for iFlag, sFlag in enumerate(asFlags):
2144 if sFlag not in g_kdEFlagsMnemonics:
2145 if sFlag.strip() in g_kdEFlagsMnemonics:
2146 asFlags[iFlag] = sFlag.strip();
2147 else:
2148 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2149 if not fRc:
2150 return False;
2151
2152 # Set them.
2153 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2154 if asOld is not None:
2155 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2156 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2157
2158 _ = iEndLine;
2159 return True;
2160
2161 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2162 """
2163 Tag: \@ophints
2164 Value: Comma or space separated list of flags and hints.
2165
2166 This covers the disassembler flags table and more.
2167 """
2168 oInstr = self.ensureInstructionForOpTag(iTagLine);
2169
2170 # Flatten as a space separated list, split it up and validate the values.
2171 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2172 if len(asHints) == 1 and asHints[0].lower() == 'none':
2173 asHints = [];
2174 else:
2175 fRc = True;
2176 for iHint, sHint in enumerate(asHints):
2177 if sHint not in g_kdHints:
2178 if sHint.strip() in g_kdHints:
2179 sHint[iHint] = sHint.strip();
2180 else:
2181 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2182 if not fRc:
2183 return False;
2184
2185 # Append them.
2186 for sHint in asHints:
2187 if sHint not in oInstr.dHints:
2188 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2189 else:
2190 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2191
2192 _ = iEndLine;
2193 return True;
2194
2195 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2196 """
2197 Tag: \@opdisenum
2198 Value: OP_XXXX
2199
2200 This is for select a specific (legacy) disassembler enum value for the
2201 instruction.
2202 """
2203 oInstr = self.ensureInstructionForOpTag(iTagLine);
2204
2205 # Flatten and split.
2206 asWords = self.flattenAllSections(aasSections).split();
2207 if len(asWords) != 1:
2208 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2209 if not asWords:
2210 return False;
2211 sDisEnum = asWords[0];
2212 if not self.oReDisEnum.match(sDisEnum):
2213 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2214 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2215
2216 # Set it.
2217 if oInstr.sDisEnum is not None:
2218 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2219 oInstr.sDisEnum = sDisEnum;
2220
2221 _ = iEndLine;
2222 return True;
2223
2224 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2225 """
2226 Tag: \@opmincpu
2227 Value: <simple CPU name>
2228
2229 Indicates when this instruction was introduced.
2230 """
2231 oInstr = self.ensureInstructionForOpTag(iTagLine);
2232
2233 # Flatten the value, split into words, make sure there's just one, valid it.
2234 asCpus = self.flattenAllSections(aasSections).split();
2235 if len(asCpus) > 1:
2236 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2237
2238 sMinCpu = asCpus[0];
2239 if sMinCpu in g_kdCpuNames:
2240 oInstr.sMinCpu = sMinCpu;
2241 else:
2242 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2243 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2244
2245 # Set it.
2246 if oInstr.sMinCpu is None:
2247 oInstr.sMinCpu = sMinCpu;
2248 elif oInstr.sMinCpu != sMinCpu:
2249 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2250
2251 _ = iEndLine;
2252 return True;
2253
2254 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2255 """
2256 Tag: \@opcpuid
2257 Value: none | <CPUID flag specifier>
2258
2259 CPUID feature bit which is required for the instruction to be present.
2260 """
2261 oInstr = self.ensureInstructionForOpTag(iTagLine);
2262
2263 # Flatten as a space separated list, split it up and validate the values.
2264 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2265 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2266 asCpuIds = [];
2267 else:
2268 fRc = True;
2269 for iCpuId, sCpuId in enumerate(asCpuIds):
2270 if sCpuId not in g_kdCpuIdFlags:
2271 if sCpuId.strip() in g_kdCpuIdFlags:
2272 sCpuId[iCpuId] = sCpuId.strip();
2273 else:
2274 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2275 if not fRc:
2276 return False;
2277
2278 # Append them.
2279 for sCpuId in asCpuIds:
2280 if sCpuId not in oInstr.asCpuIds:
2281 oInstr.asCpuIds.append(sCpuId);
2282 else:
2283 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2284
2285 _ = iEndLine;
2286 return True;
2287
2288 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2289 """
2290 Tag: \@opgroup
2291 Value: op_grp1[_subgrp2[_subsubgrp3]]
2292
2293 Instruction grouping.
2294 """
2295 oInstr = self.ensureInstructionForOpTag(iTagLine);
2296
2297 # Flatten as a space separated list, split it up and validate the values.
2298 asGroups = self.flattenAllSections(aasSections).split();
2299 if len(asGroups) != 1:
2300 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2301 sGroup = asGroups[0];
2302 if not self.oReGroupName.match(sGroup):
2303 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2304 % (sTag, sGroup, self.oReGroupName.pattern));
2305
2306 # Set it.
2307 if oInstr.sGroup is not None:
2308 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2309 oInstr.sGroup = sGroup;
2310
2311 _ = iEndLine;
2312 return True;
2313
2314 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2315 """
2316 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2317 Value: <invalid opcode behaviour style>
2318
2319 The \@opunused indicates the specification is for a currently unused
2320 instruction encoding.
2321
2322 The \@opinvalid indicates the specification is for an invalid currently
2323 instruction encoding (like UD2).
2324
2325 The \@opinvlstyle just indicates how CPUs decode the instruction when
2326 not supported (\@opcpuid, \@opmincpu) or disabled.
2327 """
2328 oInstr = self.ensureInstructionForOpTag(iTagLine);
2329
2330 # Flatten as a space separated list, split it up and validate the values.
2331 asStyles = self.flattenAllSections(aasSections).split();
2332 if len(asStyles) != 1:
2333 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2334 sStyle = asStyles[0];
2335 if sStyle not in g_kdInvalidStyles:
2336 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2337 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2338 # Set it.
2339 if oInstr.sInvalidStyle is not None:
2340 return self.errorComment(iTagLine,
2341 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2342 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2343 oInstr.sInvalidStyle = sStyle;
2344 if sTag == '@opunused':
2345 oInstr.fUnused = True;
2346 elif sTag == '@opinvalid':
2347 oInstr.fInvalid = True;
2348
2349 _ = iEndLine;
2350 return True;
2351
2352 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2353 """
2354 Tag: \@optest
2355 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2356 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2357
2358 The main idea here is to generate basic instruction tests.
2359
2360 The probably simplest way of handling the diverse input, would be to use
2361 it to produce size optimized byte code for a simple interpreter that
2362 modifies the register input and output states.
2363
2364 An alternative to the interpreter would be creating multiple tables,
2365 but that becomes rather complicated wrt what goes where and then to use
2366 them in an efficient manner.
2367 """
2368 oInstr = self.ensureInstructionForOpTag(iTagLine);
2369
2370 #
2371 # Do it section by section.
2372 #
2373 for asSectionLines in aasSections:
2374 #
2375 # Sort the input into outputs, inputs and selector conditions.
2376 #
2377 sFlatSection = self.flattenAllSections([asSectionLines,]);
2378 if not sFlatSection:
2379 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2380 continue;
2381 oTest = InstructionTest(oInstr);
2382
2383 asSelectors = [];
2384 asInputs = [];
2385 asOutputs = [];
2386 asCur = asOutputs;
2387 fRc = True;
2388 asWords = sFlatSection.split();
2389 for iWord in range(len(asWords) - 1, -1, -1):
2390 sWord = asWords[iWord];
2391 # Check for array switchers.
2392 if sWord == '->':
2393 if asCur != asOutputs:
2394 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2395 break;
2396 asCur = asInputs;
2397 elif sWord == '/':
2398 if asCur != asInputs:
2399 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2400 break;
2401 asCur = asSelectors;
2402 else:
2403 asCur.insert(0, sWord);
2404
2405 #
2406 # Validate and add selectors.
2407 #
2408 for sCond in asSelectors:
2409 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2410 oSelector = None;
2411 for sOp in TestSelector.kasCompareOps:
2412 off = sCondExp.find(sOp);
2413 if off >= 0:
2414 sVariable = sCondExp[:off];
2415 sValue = sCondExp[off + len(sOp):];
2416 if sVariable in TestSelector.kdVariables:
2417 if sValue in TestSelector.kdVariables[sVariable]:
2418 oSelector = TestSelector(sVariable, sOp, sValue);
2419 else:
2420 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2421 % ( sTag, sValue, sCond,
2422 TestSelector.kdVariables[sVariable].keys(),));
2423 else:
2424 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2425 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2426 break;
2427 if oSelector is not None:
2428 for oExisting in oTest.aoSelectors:
2429 if oExisting.sVariable == oSelector.sVariable:
2430 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2431 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2432 oTest.aoSelectors.append(oSelector);
2433 else:
2434 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2435
2436 #
2437 # Validate outputs and inputs, adding them to the test as we go along.
2438 #
2439 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2440 asValidFieldKinds = [ 'both', sDesc, ];
2441 for sItem in asItems:
2442 oItem = None;
2443 for sOp in TestInOut.kasOperators:
2444 off = sItem.find(sOp);
2445 if off < 0:
2446 continue;
2447 sField = sItem[:off];
2448 sValueType = sItem[off + len(sOp):];
2449 if sField in TestInOut.kdFields \
2450 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2451 asSplit = sValueType.split(':', 1);
2452 sValue = asSplit[0];
2453 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2454 if sType in TestInOut.kdTypes:
2455 oValid = TestInOut.kdTypes[sType].validate(sValue);
2456 if oValid is True:
2457 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2458 oItem = TestInOut(sField, sOp, sValue, sType);
2459 else:
2460 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2461 % ( sTag, sDesc, sItem, ));
2462 else:
2463 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2464 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2465 else:
2466 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2467 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2468 else:
2469 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2470 % ( sTag, sDesc, sField, sItem,
2471 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2472 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2473 break;
2474 if oItem is not None:
2475 for oExisting in aoDst:
2476 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2477 self.errorComment(iTagLine,
2478 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2479 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2480 aoDst.append(oItem);
2481 else:
2482 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2483
2484 #
2485 # .
2486 #
2487 if fRc:
2488 oInstr.aoTests.append(oTest);
2489 else:
2490 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2491 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2492 % (sTag, asSelectors, asInputs, asOutputs,));
2493
2494 _ = iEndLine;
2495 return True;
2496
2497 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2498 """
2499 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2500 """
2501 oInstr = self.ensureInstructionForOpTag(iTagLine);
2502
2503 iTest = 0;
2504 if sTag[-1] == ']':
2505 iTest = int(sTag[8:-1]);
2506 else:
2507 iTest = int(sTag[7:]);
2508
2509 if iTest != len(oInstr.aoTests):
2510 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2511 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2512
2513 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2514 """
2515 Tag: \@optestign | \@optestignore
2516 Value: <value is ignored>
2517
2518 This is a simple trick to ignore a test while debugging another.
2519
2520 See also \@oponlytest.
2521 """
2522 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2523 return True;
2524
2525 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2526 """
2527 Tag: \@opcopytests
2528 Value: <opstat | function> [..]
2529 Example: \@opcopytests add_Eb_Gb
2530
2531 Trick to avoid duplicating tests for different encodings of the same
2532 operation.
2533 """
2534 oInstr = self.ensureInstructionForOpTag(iTagLine);
2535
2536 # Flatten, validate and append the copy job to the instruction. We execute
2537 # them after parsing all the input so we can handle forward references.
2538 asToCopy = self.flattenAllSections(aasSections).split();
2539 if not asToCopy:
2540 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2541 for sToCopy in asToCopy:
2542 if sToCopy not in oInstr.asCopyTests:
2543 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2544 oInstr.asCopyTests.append(sToCopy);
2545 else:
2546 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2547 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2548 else:
2549 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2550
2551 _ = iEndLine;
2552 return True;
2553
2554 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2555 """
2556 Tag: \@oponlytest | \@oponly
2557 Value: none
2558
2559 Only test instructions with this tag. This is a trick that is handy
2560 for singling out one or two new instructions or tests.
2561
2562 See also \@optestignore.
2563 """
2564 oInstr = self.ensureInstructionForOpTag(iTagLine);
2565
2566 # Validate and add instruction to only test dictionary.
2567 sValue = self.flattenAllSections(aasSections).strip();
2568 if sValue:
2569 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2570
2571 if oInstr not in g_aoOnlyTestInstructions:
2572 g_aoOnlyTestInstructions.append(oInstr);
2573
2574 _ = iEndLine;
2575 return True;
2576
2577 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2578 """
2579 Tag: \@opxcpttype
2580 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2581
2582 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2583 """
2584 oInstr = self.ensureInstructionForOpTag(iTagLine);
2585
2586 # Flatten as a space separated list, split it up and validate the values.
2587 asTypes = self.flattenAllSections(aasSections).split();
2588 if len(asTypes) != 1:
2589 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2590 sType = asTypes[0];
2591 if sType not in g_kdXcptTypes:
2592 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2593 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2594 # Set it.
2595 if oInstr.sXcptType is not None:
2596 return self.errorComment(iTagLine,
2597 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2598 % ( sTag, oInstr.sXcptType, sType,));
2599 oInstr.sXcptType = sType;
2600
2601 _ = iEndLine;
2602 return True;
2603
2604 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2605 """
2606 Tag: \@opfunction
2607 Value: <VMM function name>
2608
2609 This is for explicitly setting the IEM function name. Normally we pick
2610 this up from the FNIEMOP_XXX macro invocation after the description, or
2611 generate it from the mnemonic and operands.
2612
2613 It it thought it maybe necessary to set it when specifying instructions
2614 which implementation isn't following immediately or aren't implemented yet.
2615 """
2616 oInstr = self.ensureInstructionForOpTag(iTagLine);
2617
2618 # Flatten and validate the value.
2619 sFunction = self.flattenAllSections(aasSections);
2620 if not self.oReFunctionName.match(sFunction):
2621 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2622 % (sTag, sFunction, self.oReFunctionName.pattern));
2623
2624 if oInstr.sFunction is not None:
2625 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2626 % (sTag, oInstr.sFunction, sFunction,));
2627 oInstr.sFunction = sFunction;
2628
2629 _ = iEndLine;
2630 return True;
2631
2632 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2633 """
2634 Tag: \@opstats
2635 Value: <VMM statistics base name>
2636
2637 This is for explicitly setting the statistics name. Normally we pick
2638 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2639 the mnemonic and operands.
2640
2641 It it thought it maybe necessary to set it when specifying instructions
2642 which implementation isn't following immediately or aren't implemented yet.
2643 """
2644 oInstr = self.ensureInstructionForOpTag(iTagLine);
2645
2646 # Flatten and validate the value.
2647 sStats = self.flattenAllSections(aasSections);
2648 if not self.oReStatsName.match(sStats):
2649 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2650 % (sTag, sStats, self.oReStatsName.pattern));
2651
2652 if oInstr.sStats is not None:
2653 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2654 % (sTag, oInstr.sStats, sStats,));
2655 oInstr.sStats = sStats;
2656
2657 _ = iEndLine;
2658 return True;
2659
2660 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2661 """
2662 Tag: \@opdone
2663 Value: none
2664
2665 Used to explictily flush the instructions that have been specified.
2666 """
2667 sFlattened = self.flattenAllSections(aasSections);
2668 if sFlattened != '':
2669 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2670 _ = sTag; _ = iEndLine;
2671 return self.doneInstructions();
2672
2673 ## @}
2674
2675
2676 def parseComment(self):
2677 """
2678 Parse the current comment (self.sComment).
2679
2680 If it's a opcode specifiying comment, we reset the macro stuff.
2681 """
2682 #
2683 # Reject if comment doesn't seem to contain anything interesting.
2684 #
2685 if self.sComment.find('Opcode') < 0 \
2686 and self.sComment.find('@') < 0:
2687 return False;
2688
2689 #
2690 # Split the comment into lines, removing leading asterisks and spaces.
2691 # Also remove leading and trailing empty lines.
2692 #
2693 asLines = self.sComment.split('\n');
2694 for iLine, sLine in enumerate(asLines):
2695 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2696
2697 while asLines and not asLines[0]:
2698 self.iCommentLine += 1;
2699 asLines.pop(0);
2700
2701 while asLines and not asLines[-1]:
2702 asLines.pop(len(asLines) - 1);
2703
2704 #
2705 # Check for old style: Opcode 0x0f 0x12
2706 #
2707 if asLines[0].startswith('Opcode '):
2708 self.parseCommentOldOpcode(asLines);
2709
2710 #
2711 # Look for @op* tagged data.
2712 #
2713 cOpTags = 0;
2714 sFlatDefault = None;
2715 sCurTag = '@default';
2716 iCurTagLine = 0;
2717 asCurSection = [];
2718 aasSections = [ asCurSection, ];
2719 for iLine, sLine in enumerate(asLines):
2720 if not sLine.startswith('@'):
2721 if sLine:
2722 asCurSection.append(sLine);
2723 elif asCurSection:
2724 asCurSection = [];
2725 aasSections.append(asCurSection);
2726 else:
2727 #
2728 # Process the previous tag.
2729 #
2730 if not asCurSection and len(aasSections) > 1:
2731 aasSections.pop(-1);
2732 if sCurTag in self.dTagHandlers:
2733 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2734 cOpTags += 1;
2735 elif sCurTag.startswith('@op'):
2736 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2737 elif sCurTag == '@default':
2738 sFlatDefault = self.flattenAllSections(aasSections);
2739 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2740 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2741 elif sCurTag in ['@encoding', '@opencoding']:
2742 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2743
2744 #
2745 # New tag.
2746 #
2747 asSplit = sLine.split(None, 1);
2748 sCurTag = asSplit[0].lower();
2749 if len(asSplit) > 1:
2750 asCurSection = [asSplit[1],];
2751 else:
2752 asCurSection = [];
2753 aasSections = [asCurSection, ];
2754 iCurTagLine = iLine;
2755
2756 #
2757 # Process the final tag.
2758 #
2759 if not asCurSection and len(aasSections) > 1:
2760 aasSections.pop(-1);
2761 if sCurTag in self.dTagHandlers:
2762 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2763 cOpTags += 1;
2764 elif sCurTag.startswith('@op'):
2765 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2766 elif sCurTag == '@default':
2767 sFlatDefault = self.flattenAllSections(aasSections);
2768
2769 #
2770 # Don't allow default text in blocks containing @op*.
2771 #
2772 if cOpTags > 0 and sFlatDefault:
2773 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2774
2775 return True;
2776
2777 def parseMacroInvocation(self, sInvocation):
2778 """
2779 Parses a macro invocation.
2780
2781 Returns a tuple, first element is the offset following the macro
2782 invocation. The second element is a list of macro arguments, where the
2783 zero'th is the macro name.
2784 """
2785 # First the name.
2786 offOpen = sInvocation.find('(');
2787 if offOpen <= 0:
2788 self.raiseError("macro invocation open parenthesis not found");
2789 sName = sInvocation[:offOpen].strip();
2790 if not self.oReMacroName.match(sName):
2791 return self.error("invalid macro name '%s'" % (sName,));
2792 asRet = [sName, ];
2793
2794 # Arguments.
2795 iLine = self.iLine;
2796 cDepth = 1;
2797 off = offOpen + 1;
2798 offStart = off;
2799 while cDepth > 0:
2800 if off >= len(sInvocation):
2801 if iLine >= len(self.asLines):
2802 return self.error('macro invocation beyond end of file');
2803 sInvocation += self.asLines[iLine];
2804 iLine += 1;
2805 ch = sInvocation[off];
2806
2807 if ch == ',' or ch == ')':
2808 if cDepth == 1:
2809 asRet.append(sInvocation[offStart:off].strip());
2810 offStart = off + 1;
2811 if ch == ')':
2812 cDepth -= 1;
2813 elif ch == '(':
2814 cDepth += 1;
2815 off += 1;
2816
2817 return (off, asRet);
2818
2819 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2820 """
2821 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2822 """
2823 offHit = sCode.find(sMacro);
2824 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2825 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2826 return (offHit + offAfter, asRet);
2827 return (len(sCode), None);
2828
2829 def findAndParseMacroInvocation(self, sCode, sMacro):
2830 """
2831 Returns None if not found, arguments as per parseMacroInvocation if found.
2832 """
2833 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2834
2835 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2836 """
2837 Returns same as findAndParseMacroInvocation.
2838 """
2839 for sMacro in asMacro:
2840 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2841 if asRet is not None:
2842 return asRet;
2843 return None;
2844
2845 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2846 sDisHints, sIemHints, asOperands):
2847 """
2848 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2849 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2850 """
2851 #
2852 # Some invocation checks.
2853 #
2854 if sUpper != sUpper.upper():
2855 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2856 if sLower != sLower.lower():
2857 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2858 if sUpper.lower() != sLower:
2859 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2860 if not self.oReMnemonic.match(sLower):
2861 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2862
2863 #
2864 # Check if sIemHints tells us to not consider this macro invocation.
2865 #
2866 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2867 return True;
2868
2869 # Apply to the last instruction only for now.
2870 if not self.aoCurInstrs:
2871 self.addInstruction();
2872 oInstr = self.aoCurInstrs[-1];
2873 if oInstr.iLineMnemonicMacro == -1:
2874 oInstr.iLineMnemonicMacro = self.iLine;
2875 else:
2876 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2877 % (sMacro, oInstr.iLineMnemonicMacro,));
2878
2879 # Mnemonic
2880 if oInstr.sMnemonic is None:
2881 oInstr.sMnemonic = sLower;
2882 elif oInstr.sMnemonic != sLower:
2883 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2884
2885 # Process operands.
2886 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2887 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2888 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2889 for iOperand, sType in enumerate(asOperands):
2890 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2891 if sWhere is None:
2892 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2893 if iOperand < len(oInstr.aoOperands): # error recovery.
2894 sWhere = oInstr.aoOperands[iOperand].sWhere;
2895 sType = oInstr.aoOperands[iOperand].sType;
2896 else:
2897 sWhere = 'reg';
2898 sType = 'Gb';
2899 if iOperand == len(oInstr.aoOperands):
2900 oInstr.aoOperands.append(Operand(sWhere, sType))
2901 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2902 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2903 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2904 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2905
2906 # Encoding.
2907 if sForm not in g_kdIemForms:
2908 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2909 else:
2910 if oInstr.sEncoding is None:
2911 oInstr.sEncoding = g_kdIemForms[sForm][0];
2912 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2913 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2914 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2915
2916 # Check the parameter locations for the encoding.
2917 if g_kdIemForms[sForm][1] is not None:
2918 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2919 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2920 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2921 else:
2922 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2923 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2924 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2925 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2926
2927 # Stats.
2928 if not self.oReStatsName.match(sStats):
2929 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2930 elif oInstr.sStats is None:
2931 oInstr.sStats = sStats;
2932 elif oInstr.sStats != sStats:
2933 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2934 % (sMacro, oInstr.sStats, sStats,));
2935
2936 # Process the hints (simply merge with @ophints w/o checking anything).
2937 for sHint in sDisHints.split('|'):
2938 sHint = sHint.strip();
2939 if sHint.startswith('DISOPTYPE_'):
2940 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2941 if sShortHint in g_kdHints:
2942 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2943 else:
2944 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2945 elif sHint != '0':
2946 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2947
2948 for sHint in sIemHints.split('|'):
2949 sHint = sHint.strip();
2950 if sHint.startswith('IEMOPHINT_'):
2951 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2952 if sShortHint in g_kdHints:
2953 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2954 else:
2955 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2956 elif sHint != '0':
2957 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2958
2959
2960 _ = sAsm;
2961 return True;
2962
2963 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2964 """
2965 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2966 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2967 """
2968 if not asOperands:
2969 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2970 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2971 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2972
2973 def checkCodeForMacro(self, sCode):
2974 """
2975 Checks code for relevant macro invocation.
2976 """
2977 #
2978 # Scan macro invocations.
2979 #
2980 if sCode.find('(') > 0:
2981 # Look for instruction decoder function definitions. ASSUME single line.
2982 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2983 [ 'FNIEMOP_DEF',
2984 'FNIEMOP_STUB',
2985 'FNIEMOP_STUB_1',
2986 'FNIEMOP_UD_STUB',
2987 'FNIEMOP_UD_STUB_1' ]);
2988 if asArgs is not None:
2989 sFunction = asArgs[1];
2990
2991 if not self.aoCurInstrs:
2992 self.addInstruction();
2993 for oInstr in self.aoCurInstrs:
2994 if oInstr.iLineFnIemOpMacro == -1:
2995 oInstr.iLineFnIemOpMacro = self.iLine;
2996 else:
2997 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2998 self.setInstrunctionAttrib('sFunction', sFunction);
2999 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3000 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3001 if asArgs[0].find('STUB') > 0:
3002 self.doneInstructions();
3003 return True;
3004
3005 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3006 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3007 if asArgs is not None:
3008 if len(self.aoCurInstrs) == 1:
3009 oInstr = self.aoCurInstrs[0];
3010 if oInstr.sStats is None:
3011 oInstr.sStats = asArgs[1];
3012 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3013
3014 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3015 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3016 if asArgs is not None:
3017 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3018 []);
3019 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3020 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3021 if asArgs is not None:
3022 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3023 [asArgs[6],]);
3024 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3025 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3026 if asArgs is not None:
3027 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3028 [asArgs[6], asArgs[7]]);
3029 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3030 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3031 if asArgs is not None:
3032 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3033 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3034 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3035 # a_fIemHints)
3036 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3037 if asArgs is not None:
3038 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3039 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3040
3041 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3042 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3043 if asArgs is not None:
3044 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3045 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3046 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3047 if asArgs is not None:
3048 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3049 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3050 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3051 if asArgs is not None:
3052 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3053 [asArgs[4], asArgs[5],]);
3054 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3055 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3056 if asArgs is not None:
3057 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3058 [asArgs[4], asArgs[5], asArgs[6],]);
3059 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3060 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3061 if asArgs is not None:
3062 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3063 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3064
3065 return False;
3066
3067
3068 def parse(self):
3069 """
3070 Parses the given file.
3071 Returns number or errors.
3072 Raises exception on fatal trouble.
3073 """
3074 self.debug('Parsing %s' % (self.sSrcFile,));
3075
3076 while self.iLine < len(self.asLines):
3077 sLine = self.asLines[self.iLine];
3078 self.iLine += 1;
3079
3080 # We only look for comments, so only lines with a slash might possibly
3081 # influence the parser state.
3082 if sLine.find('/') >= 0:
3083 #self.debug('line %d: slash' % (self.iLine,));
3084
3085 offLine = 0;
3086 while offLine < len(sLine):
3087 if self.iState == self.kiCode:
3088 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3089 if offHit >= 0:
3090 self.checkCodeForMacro(sLine[offLine:offHit]);
3091 self.sComment = '';
3092 self.iCommentLine = self.iLine;
3093 self.iState = self.kiCommentMulti;
3094 offLine = offHit + 2;
3095 else:
3096 self.checkCodeForMacro(sLine[offLine:]);
3097 offLine = len(sLine);
3098
3099 elif self.iState == self.kiCommentMulti:
3100 offHit = sLine.find('*/', offLine);
3101 if offHit >= 0:
3102 self.sComment += sLine[offLine:offHit];
3103 self.iState = self.kiCode;
3104 offLine = offHit + 2;
3105 self.parseComment();
3106 else:
3107 self.sComment += sLine[offLine:];
3108 offLine = len(sLine);
3109 else:
3110 assert False;
3111
3112 # No slash, but append the line if in multi-line comment.
3113 elif self.iState == self.kiCommentMulti:
3114 #self.debug('line %d: multi' % (self.iLine,));
3115 self.sComment += sLine;
3116
3117 # No slash, but check code line for relevant macro.
3118 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3119 #self.debug('line %d: macro' % (self.iLine,));
3120 self.checkCodeForMacro(sLine);
3121
3122 # If the line is a '}' in the first position, complete the instructions.
3123 elif self.iState == self.kiCode and sLine[0] == '}':
3124 #self.debug('line %d: }' % (self.iLine,));
3125 self.doneInstructions();
3126
3127 self.doneInstructions();
3128 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
3129 self.debug('%s instruction stubs' % (self.cTotalStubs,));
3130 return self.printErrors();
3131
3132
3133def __parseFileByName(sSrcFile, sDefaultMap):
3134 """
3135 Parses one source file for instruction specfications.
3136 """
3137 #
3138 # Read sSrcFile into a line array.
3139 #
3140 try:
3141 oFile = open(sSrcFile, "r");
3142 except Exception as oXcpt:
3143 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3144 try:
3145 asLines = oFile.readlines();
3146 except Exception as oXcpt:
3147 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3148 finally:
3149 oFile.close();
3150
3151 #
3152 # Do the parsing.
3153 #
3154 try:
3155 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3156 except ParserException as oXcpt:
3157 print(str(oXcpt));
3158 raise;
3159 except Exception as oXcpt:
3160 raise;
3161
3162 return cErrors;
3163
3164
3165def __doTestCopying():
3166 """
3167 Executes the asCopyTests instructions.
3168 """
3169 asErrors = [];
3170 for oDstInstr in g_aoAllInstructions:
3171 if oDstInstr.asCopyTests:
3172 for sSrcInstr in oDstInstr.asCopyTests:
3173 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3174 if oSrcInstr:
3175 aoSrcInstrs = [oSrcInstr,];
3176 else:
3177 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3178 if aoSrcInstrs:
3179 for oSrcInstr in aoSrcInstrs:
3180 if oSrcInstr != oDstInstr:
3181 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3182 else:
3183 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3184 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3185 else:
3186 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3187 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3188
3189 if asErrors:
3190 sys.stderr.write(u''.join(asErrors));
3191 return len(asErrors);
3192
3193
3194def __applyOnlyTest():
3195 """
3196 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3197 all other instructions so that only these get tested.
3198 """
3199 if g_aoOnlyTestInstructions:
3200 for oInstr in g_aoAllInstructions:
3201 if oInstr.aoTests:
3202 if oInstr not in g_aoOnlyTestInstructions:
3203 oInstr.aoTests = [];
3204 return 0;
3205
3206def __parseAll():
3207 """
3208 Parses all the IEMAllInstruction*.cpp.h files.
3209
3210 Raises exception on failure.
3211 """
3212 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3213 cErrors = 0;
3214 for sDefaultMap, sName in [
3215 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3216 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3217 ]:
3218 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3219 cErrors += __doTestCopying();
3220 cErrors += __applyOnlyTest();
3221
3222 if cErrors != 0:
3223 #raise Exception('%d parse errors' % (cErrors,));
3224 sys.exit(1);
3225 return True;
3226
3227
3228
3229__parseAll();
3230
3231
3232#
3233# Generators (may perhaps move later).
3234#
3235def generateDisassemblerTables(oDstFile = sys.stdout):
3236 """
3237 Generates disassembler tables.
3238 """
3239
3240 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3241 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3242 assert oMap.sName == sName;
3243 asLines = [];
3244
3245 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3246 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3247 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3248 asLines.append('{');
3249
3250 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3251
3252 aoTableOrder = oMap.getInstructionsInTableOrder();
3253 for iInstr, oInstr in enumerate(aoTableOrder):
3254
3255 if (iInstr & 0xf) == 0:
3256 if iInstr != 0:
3257 asLines.append('');
3258 asLines.append(' /* %x */' % (iInstr >> 4,));
3259
3260 if oInstr is None:
3261 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3262 elif isinstance(oInstr, list):
3263 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3264 else:
3265 sMacro = 'OP';
3266 cMaxOperands = 3;
3267 if len(oInstr.aoOperands) > 3:
3268 sMacro = 'OPVEX'
3269 cMaxOperands = 4;
3270 assert len(oInstr.aoOperands) <= cMaxOperands;
3271
3272 #
3273 # Format string.
3274 #
3275 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3276 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3277 sTmp += ' ' if iOperand == 0 else ',';
3278 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3279 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3280 else:
3281 sTmp += g_kdOpTypes[oOperand.sType][2];
3282 sTmp += '",';
3283 asColumns = [ sTmp, ];
3284
3285 #
3286 # Decoders.
3287 #
3288 iStart = len(asColumns);
3289 if oInstr.sEncoding is None:
3290 pass;
3291 elif oInstr.sEncoding == 'ModR/M':
3292 # ASSUME the first operand is using the ModR/M encoding
3293 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3294 asColumns.append('IDX_ParseModRM,');
3295 ## @todo IDX_ParseVexDest
3296 # Is second operand using ModR/M too?
3297 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3298 asColumns.append('IDX_UseModRM,')
3299 elif oInstr.sEncoding in [ 'prefix', ]:
3300 for oOperand in oInstr.aoOperands:
3301 asColumns.append('0,');
3302 elif oInstr.sEncoding in [ 'fixed' ]:
3303 pass;
3304 elif oInstr.sEncoding == 'vex2':
3305 asColumns.append('IDX_ParseVex2b,')
3306 elif oInstr.sEncoding == 'vex3':
3307 asColumns.append('IDX_ParseVex3b,')
3308 elif oInstr.sEncoding in g_dInstructionMaps:
3309 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3310 else:
3311 ## @todo
3312 #IDX_ParseTwoByteEsc,
3313 #IDX_ParseGrp1,
3314 #IDX_ParseShiftGrp2,
3315 #IDX_ParseGrp3,
3316 #IDX_ParseGrp4,
3317 #IDX_ParseGrp5,
3318 #IDX_Parse3DNow,
3319 #IDX_ParseGrp6,
3320 #IDX_ParseGrp7,
3321 #IDX_ParseGrp8,
3322 #IDX_ParseGrp9,
3323 #IDX_ParseGrp10,
3324 #IDX_ParseGrp12,
3325 #IDX_ParseGrp13,
3326 #IDX_ParseGrp14,
3327 #IDX_ParseGrp15,
3328 #IDX_ParseGrp16,
3329 #IDX_ParseThreeByteEsc4,
3330 #IDX_ParseThreeByteEsc5,
3331 #IDX_ParseModFence,
3332 #IDX_ParseEscFP,
3333 #IDX_ParseNopPause,
3334 #IDX_ParseInvOpModRM,
3335 assert False, str(oInstr);
3336
3337 # Check for immediates and stuff in the remaining operands.
3338 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3339 sIdx = g_kdOpTypes[oOperand.sType][0];
3340 if sIdx != 'IDX_UseModRM':
3341 asColumns.append(sIdx + ',');
3342 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3343
3344 #
3345 # Opcode and operands.
3346 #
3347 assert oInstr.sDisEnum, str(oInstr);
3348 asColumns.append(oInstr.sDisEnum + ',');
3349 iStart = len(asColumns)
3350 for oOperand in oInstr.aoOperands:
3351 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3352 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3353
3354 #
3355 # Flags.
3356 #
3357 sTmp = '';
3358 for sHint in sorted(oInstr.dHints.keys()):
3359 sDefine = g_kdHints[sHint];
3360 if sDefine.startswith('DISOPTYPE_'):
3361 if sTmp:
3362 sTmp += ' | ' + sDefine;
3363 else:
3364 sTmp += sDefine;
3365 if sTmp:
3366 sTmp += '),';
3367 else:
3368 sTmp += '0),';
3369 asColumns.append(sTmp);
3370
3371 #
3372 # Format the columns into a line.
3373 #
3374 sLine = '';
3375 for i, s in enumerate(asColumns):
3376 if len(sLine) < aoffColumns[i]:
3377 sLine += ' ' * (aoffColumns[i] - len(sLine));
3378 else:
3379 sLine += ' ';
3380 sLine += s;
3381
3382 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3383 # DISOPTYPE_HARMLESS),
3384 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3385 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3386
3387 asLines.append(sLine);
3388
3389 asLines.append('};');
3390 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3391
3392 #
3393 # Write out the lines.
3394 #
3395 oDstFile.write('\n'.join(asLines));
3396 oDstFile.write('\n');
3397 break; #for now
3398
3399if __name__ == '__main__':
3400 generateDisassemblerTables();
3401
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette