VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66609

Last change on this file since 66609 was 66479, checked in by vboxsync, 8 years ago

IEM: Stubbed VEX opcode map 2 and 3.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 140.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66479 2017-04-07 15:55:21Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66479 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224
225 # ModR/M.rm - register only.
226 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
227 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
228
229 # ModR/M.rm - memory only.
230 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
231 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
232 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
233 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
234 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
235 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
236 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
237
238 # ModR/M.reg
239 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
240 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
241 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
242 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
243 'VssZxReg': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
244 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
245 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
246 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
247 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
248 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
249
250 # Immediate values.
251 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
252 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
253 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
254 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
255 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
256 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
257
258 # Address operands (no ModR/M).
259 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
260 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
261
262 # Relative jump targets
263 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
264 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
265
266 # DS:rSI
267 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
268 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
269 # ES:rDI
270 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
271 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
272
273 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
274
275 # Fixed registers.
276 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
277 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
278 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
279 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
280 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
281 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
282 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
283 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
284};
285
286# IDX_ParseFixedReg
287# IDX_ParseVexDest
288
289
290## IEMFORM_XXX mappings.
291g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
292 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
293 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
294 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
295 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
296 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
297 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
298 'M': ( 'ModR/M', [ 'rm', ], ),
299 'M_REG': ( 'ModR/M', [ 'rm', ], ),
300 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
301 'R': ( 'ModR/M', [ 'reg', ], ),
302
303 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
304 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
305 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
306 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
307 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
308 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
309 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
310 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
311 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
312 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
313 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
314 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
315
316 'FIXED': ( 'fixed', None, )
317};
318
319## \@oppfx values.
320g_kdPrefixes = {
321 'none': [],
322 '0x66': [],
323 '0xf3': [],
324 '0xf2': [],
325};
326
327## Special \@opcode tag values.
328g_kdSpecialOpcodes = {
329 '/reg': [],
330 'mr/reg': [],
331 '11 /reg': [],
332 '!11 /reg': [],
333 '11 mr/reg': [],
334 '!11 mr/reg': [],
335};
336
337## Special \@opcodesub tag values.
338g_kdSubOpcodes = {
339 'none': [ None, ],
340 '11 mr/reg': [ '11 mr/reg', ],
341 '11': [ '11 mr/reg', ], ##< alias
342 '!11 mr/reg': [ '!11 mr/reg', ],
343 '!11': [ '!11 mr/reg', ], ##< alias
344};
345
346## Valid values for \@openc
347g_kdEncodings = {
348 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
349 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
350 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
351 'prefix': [ None, ], ##< Prefix
352};
353
354## \@opunused, \@opinvalid, \@opinvlstyle
355g_kdInvalidStyles = {
356 'immediate': [], ##< CPU stops decoding immediately after the opcode.
357 'intel-modrm': [], ##< Intel decodes ModR/M.
358 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
359 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
360 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
361};
362
363g_kdCpuNames = {
364 '8086': (),
365 '80186': (),
366 '80286': (),
367 '80386': (),
368 '80486': (),
369};
370
371## \@opcpuid
372g_kdCpuIdFlags = {
373 'vme': 'X86_CPUID_FEATURE_EDX_VME',
374 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
375 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
376 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
377 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
378 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
379 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
380 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
381 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
382 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
383 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
384 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
385 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
386 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
387 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
388 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
389 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
390 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
391 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
392 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
393 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
394 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
395 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
396 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
397 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
398 'aes': 'X86_CPUID_FEATURE_ECX_AES',
399 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
400 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
401 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
402 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
403
404 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
405 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
406 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
407 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
408 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
409 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
410 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
411 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
412 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
413 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
414};
415
416## \@ophints values.
417g_kdHints = {
418 'invalid': 'DISOPTYPE_INVALID', ##<
419 'harmless': 'DISOPTYPE_HARMLESS', ##<
420 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
421 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
422 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
423 'portio': 'DISOPTYPE_PORTIO', ##<
424 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
425 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
426 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
427 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
428 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
429 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
430 'illegal': 'DISOPTYPE_ILLEGAL', ##<
431 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
432 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
433 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
434 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
435 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
436 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
437 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
438 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
439 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
440 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
441 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
442 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
443 ## (only in 16 & 32 bits mode!)
444 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
445 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
446 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
447 'ignores_op_size': '', ##< Ignores both operand size prefixes.
448 'lock_allowed': '', ##< Lock prefix allowed.
449};
450
451## \@opxcpttype values (see SDMv2 2.4, 2.7).
452g_kdXcptTypes = {
453 'none': [],
454 '1': [],
455 '2': [],
456 '3': [],
457 '4': [],
458 '4UA': [],
459 '5': [],
460 '6': [],
461 '7': [],
462 '8': [],
463 '11': [],
464 '12': [],
465 'E1': [],
466 'E1NF': [],
467 'E2': [],
468 'E3': [],
469 'E3NF': [],
470 'E4': [],
471 'E4NF': [],
472 'E5': [],
473 'E5NF': [],
474 'E6': [],
475 'E6NF': [],
476 'E7NF': [],
477 'E9': [],
478 'E9NF': [],
479 'E10': [],
480 'E11': [],
481 'E12': [],
482 'E12NF': [],
483};
484
485
486def _isValidOpcodeByte(sOpcode):
487 """
488 Checks if sOpcode is a valid lower case opcode byte.
489 Returns true/false.
490 """
491 if len(sOpcode) == 4:
492 if sOpcode[:2] == '0x':
493 if sOpcode[2] in '0123456789abcdef':
494 if sOpcode[3] in '0123456789abcdef':
495 return True;
496 return False;
497
498
499class InstructionMap(object):
500 """
501 Instruction map.
502
503 The opcode map provides the lead opcode bytes (empty for the one byte
504 opcode map). An instruction can be member of multiple opcode maps as long
505 as it uses the same opcode value within the map (because of VEX).
506 """
507
508 kdEncodings = {
509 'legacy': [],
510 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
511 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
512 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
513 'xop8': [], ##< XOP prefix with vvvvv = 8
514 'xop9': [], ##< XOP prefix with vvvvv = 9
515 'xop10': [], ##< XOP prefix with vvvvv = 10
516 };
517 ## Selectors.
518 ## The first value is the number of table entries required by a
519 ## decoder or disassembler for this type of selector.
520 kdSelectors = {
521 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
522 '/r': [ 8, ], ##< modrm.reg selects the instruction.
523 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
524 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
525 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
526 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
527 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
528 };
529
530 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
531 assert sSelector in self.kdSelectors;
532 assert sEncoding in self.kdEncodings;
533 if asLeadOpcodes is None:
534 asLeadOpcodes = [];
535 else:
536 for sOpcode in asLeadOpcodes:
537 assert _isValidOpcodeByte(sOpcode);
538 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
539
540 self.sName = sName;
541 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
542 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
543 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
544 self.aoInstructions = []; # type: Instruction
545 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
546
547 def getTableSize(self):
548 """
549 Number of table entries. This corresponds directly to the selector.
550 """
551 return self.kdSelectors[self.sSelector][0];
552
553 def getInstructionIndex(self, oInstr):
554 """
555 Returns the table index for the instruction.
556 """
557 bOpcode = oInstr.getOpcodeByte();
558
559 # The byte selector is simple. We need a full opcode byte and need just return it.
560 if self.sSelector == 'byte':
561 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
562 return bOpcode;
563
564 # The other selectors needs masking and shifting.
565 if self.sSelector == '/r':
566 return (bOpcode >> 3) & 0x7;
567
568 if self.sSelector == 'mod /r':
569 return (bOpcode >> 3) & 0x1f;
570
571 if self.sSelector == 'memreg /r':
572 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
573
574 if self.sSelector == '!11 /r':
575 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
576 return (bOpcode >> 3) & 0x7;
577
578 if self.sSelector == '11 /r':
579 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
580 return (bOpcode >> 3) & 0x7;
581
582 if self.sSelector == '11':
583 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
584 return bOpcode & 0x3f;
585
586 assert False, self.sSelector;
587 return -1;
588
589 def getInstructionsInTableOrder(self):
590 """
591 Get instructions in table order.
592
593 Returns array of instructions. Normally there is exactly one
594 instruction per entry. However the entry could also be None if
595 not instruction was specified for that opcode value. Or there
596 could be a list of instructions to deal with special encodings
597 where for instance prefix (e.g. REX.W) encodes a different
598 instruction or different CPUs have different instructions or
599 prefixes in the same place.
600 """
601 # Start with empty table.
602 cTable = self.getTableSize();
603 aoTable = [None] * cTable;
604
605 # Insert the instructions.
606 for oInstr in self.aoInstructions:
607 if oInstr.sOpcode:
608 idxOpcode = self.getInstructionIndex(oInstr);
609 assert idxOpcode < cTable, str(idxOpcode);
610
611 oExisting = aoTable[idxOpcode];
612 if oExisting is None:
613 aoTable[idxOpcode] = oInstr;
614 elif not isinstance(oExisting, list):
615 aoTable[idxOpcode] = list([oExisting, oInstr]);
616 else:
617 oExisting.append(oInstr);
618
619 return aoTable;
620
621
622 def getDisasTableName(self):
623 """
624 Returns the disassembler table name for this map.
625 """
626 sName = 'g_aDisas';
627 for sWord in self.sName.split('_'):
628 if sWord == 'm': # suffix indicating modrm.mod==mem
629 sName += '_m';
630 elif sWord == 'r': # suffix indicating modrm.mod==reg
631 sName += '_r';
632 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
633 sName += '_' + sWord;
634 else:
635 sWord = sWord.replace('grp', 'Grp');
636 sWord = sWord.replace('map', 'Map');
637 sName += sWord[0].upper() + sWord[1:];
638 return sName;
639
640
641class TestType(object):
642 """
643 Test value type.
644
645 This base class deals with integer like values. The fUnsigned constructor
646 parameter indicates the default stance on zero vs sign extending. It is
647 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
648 """
649 def __init__(self, sName, acbSizes = None, fUnsigned = True):
650 self.sName = sName;
651 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
652 self.fUnsigned = fUnsigned;
653
654 class BadValue(Exception):
655 """ Bad value exception. """
656 def __init__(self, sMessage):
657 Exception.__init__(self, sMessage);
658 self.sMessage = sMessage;
659
660 ## For ascii ~ operator.
661 kdHexInv = {
662 '0': 'f',
663 '1': 'e',
664 '2': 'd',
665 '3': 'c',
666 '4': 'b',
667 '5': 'a',
668 '6': '9',
669 '7': '8',
670 '8': '7',
671 '9': '6',
672 'a': '5',
673 'b': '4',
674 'c': '3',
675 'd': '2',
676 'e': '1',
677 'f': '0',
678 };
679
680 def get(self, sValue):
681 """
682 Get the shortest normal sized byte representation of oValue.
683
684 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
685 The latter form is for AND+OR pairs where the first entry is what to
686 AND with the field and the second the one or OR with.
687
688 Raises BadValue if invalid value.
689 """
690 if not sValue:
691 raise TestType.BadValue('empty value');
692
693 # Deal with sign and detect hexadecimal or decimal.
694 fSignExtend = not self.fUnsigned;
695 if sValue[0] == '-' or sValue[0] == '+':
696 fSignExtend = True;
697 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
698 else:
699 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
700
701 # try convert it to long integer.
702 try:
703 iValue = long(sValue, 16 if fHex else 10);
704 except Exception as oXcpt:
705 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
706
707 # Convert the hex string and pad it to a decent value. Negative values
708 # needs to be manually converted to something non-negative (~-n + 1).
709 if iValue >= 0:
710 sHex = hex(iValue);
711 if sys.version_info[0] < 3:
712 assert sHex[-1] == 'L';
713 sHex = sHex[:-1];
714 assert sHex[:2] == '0x';
715 sHex = sHex[2:];
716 else:
717 sHex = hex(-iValue - 1);
718 if sys.version_info[0] < 3:
719 assert sHex[-1] == 'L';
720 sHex = sHex[:-1];
721 assert sHex[:2] == '0x';
722 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
723 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
724 sHex = 'f' + sHex;
725
726 cDigits = len(sHex);
727 if cDigits <= self.acbSizes[-1] * 2:
728 for cb in self.acbSizes:
729 cNaturalDigits = cb * 2;
730 if cDigits <= cNaturalDigits:
731 break;
732 else:
733 cNaturalDigits = self.acbSizes[-1] * 2;
734 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
735 assert isinstance(cNaturalDigits, int)
736
737 if cNaturalDigits != cDigits:
738 cNeeded = cNaturalDigits - cDigits;
739 if iValue >= 0:
740 sHex = ('0' * cNeeded) + sHex;
741 else:
742 sHex = ('f' * cNeeded) + sHex;
743
744 # Invert and convert to bytearray and return it.
745 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
746
747 return ((fSignExtend, abValue),);
748
749 def validate(self, sValue):
750 """
751 Returns True if value is okay, error message on failure.
752 """
753 try:
754 self.get(sValue);
755 except TestType.BadValue as oXcpt:
756 return oXcpt.sMessage;
757 return True;
758
759 def isAndOrPair(self, sValue):
760 """
761 Checks if sValue is a pair.
762 """
763 _ = sValue;
764 return False;
765
766
767class TestTypeEflags(TestType):
768 """
769 Special value parsing for EFLAGS/RFLAGS/FLAGS.
770 """
771
772 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
773
774 def __init__(self, sName):
775 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
776
777 def get(self, sValue):
778 fClear = 0;
779 fSet = 0;
780 for sFlag in sValue.split(','):
781 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
782 if sConstant is None:
783 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
784 if sConstant[0] == '!':
785 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
786 else:
787 fSet |= g_kdX86EFlagsConstants[sConstant];
788
789 aoSet = TestType.get(self, '0x%x' % (fSet,));
790 if fClear != 0:
791 aoClear = TestType.get(self, '%#x' % (fClear,))
792 assert self.isAndOrPair(sValue) is True;
793 return (aoClear[0], aoSet[0]);
794 assert self.isAndOrPair(sValue) is False;
795 return aoSet;
796
797 def isAndOrPair(self, sValue):
798 for sZeroFlag in self.kdZeroValueFlags:
799 if sValue.find(sZeroFlag) >= 0:
800 return True;
801 return False;
802
803class TestTypeFromDict(TestType):
804 """
805 Special value parsing for CR0.
806 """
807
808 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
809
810 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
811 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
812 self.kdConstantsAndValues = kdConstantsAndValues;
813 self.sConstantPrefix = sConstantPrefix;
814
815 def get(self, sValue):
816 fValue = 0;
817 for sFlag in sValue.split(','):
818 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
819 if fFlagValue is None:
820 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
821 fValue |= fFlagValue;
822 return TestType.get(self, '0x%x' % (fValue,));
823
824
825class TestInOut(object):
826 """
827 One input or output state modifier.
828
829 This should be thought as values to modify BS3REGCTX and extended (needs
830 to be structured) state.
831 """
832 ## Assigned operators.
833 kasOperators = [
834 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
835 '&~=',
836 '&=',
837 '|=',
838 '='
839 ];
840 ## Types
841 kdTypes = {
842 'uint': TestType('uint', fUnsigned = True),
843 'int': TestType('int'),
844 'efl': TestTypeEflags('efl'),
845 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
846 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
847 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
848 };
849 ## CPU context fields.
850 kdFields = {
851 # name: ( default type, [both|input|output], )
852 # Operands.
853 'op1': ( 'uint', 'both', ), ## \@op1
854 'op2': ( 'uint', 'both', ), ## \@op2
855 'op3': ( 'uint', 'both', ), ## \@op3
856 'op4': ( 'uint', 'both', ), ## \@op4
857 # Flags.
858 'efl': ( 'efl', 'both', ),
859 'efl_undef': ( 'uint', 'output', ),
860 # 8-bit GPRs.
861 'al': ( 'uint', 'both', ),
862 'cl': ( 'uint', 'both', ),
863 'dl': ( 'uint', 'both', ),
864 'bl': ( 'uint', 'both', ),
865 'ah': ( 'uint', 'both', ),
866 'ch': ( 'uint', 'both', ),
867 'dh': ( 'uint', 'both', ),
868 'bh': ( 'uint', 'both', ),
869 'r8l': ( 'uint', 'both', ),
870 'r9l': ( 'uint', 'both', ),
871 'r10l': ( 'uint', 'both', ),
872 'r11l': ( 'uint', 'both', ),
873 'r12l': ( 'uint', 'both', ),
874 'r13l': ( 'uint', 'both', ),
875 'r14l': ( 'uint', 'both', ),
876 'r15l': ( 'uint', 'both', ),
877 # 16-bit GPRs.
878 'ax': ( 'uint', 'both', ),
879 'dx': ( 'uint', 'both', ),
880 'cx': ( 'uint', 'both', ),
881 'bx': ( 'uint', 'both', ),
882 'sp': ( 'uint', 'both', ),
883 'bp': ( 'uint', 'both', ),
884 'si': ( 'uint', 'both', ),
885 'di': ( 'uint', 'both', ),
886 'r8w': ( 'uint', 'both', ),
887 'r9w': ( 'uint', 'both', ),
888 'r10w': ( 'uint', 'both', ),
889 'r11w': ( 'uint', 'both', ),
890 'r12w': ( 'uint', 'both', ),
891 'r13w': ( 'uint', 'both', ),
892 'r14w': ( 'uint', 'both', ),
893 'r15w': ( 'uint', 'both', ),
894 # 32-bit GPRs.
895 'eax': ( 'uint', 'both', ),
896 'edx': ( 'uint', 'both', ),
897 'ecx': ( 'uint', 'both', ),
898 'ebx': ( 'uint', 'both', ),
899 'esp': ( 'uint', 'both', ),
900 'ebp': ( 'uint', 'both', ),
901 'esi': ( 'uint', 'both', ),
902 'edi': ( 'uint', 'both', ),
903 'r8d': ( 'uint', 'both', ),
904 'r9d': ( 'uint', 'both', ),
905 'r10d': ( 'uint', 'both', ),
906 'r11d': ( 'uint', 'both', ),
907 'r12d': ( 'uint', 'both', ),
908 'r13d': ( 'uint', 'both', ),
909 'r14d': ( 'uint', 'both', ),
910 'r15d': ( 'uint', 'both', ),
911 # 64-bit GPRs.
912 'rax': ( 'uint', 'both', ),
913 'rdx': ( 'uint', 'both', ),
914 'rcx': ( 'uint', 'both', ),
915 'rbx': ( 'uint', 'both', ),
916 'rsp': ( 'uint', 'both', ),
917 'rbp': ( 'uint', 'both', ),
918 'rsi': ( 'uint', 'both', ),
919 'rdi': ( 'uint', 'both', ),
920 'r8': ( 'uint', 'both', ),
921 'r9': ( 'uint', 'both', ),
922 'r10': ( 'uint', 'both', ),
923 'r11': ( 'uint', 'both', ),
924 'r12': ( 'uint', 'both', ),
925 'r13': ( 'uint', 'both', ),
926 'r14': ( 'uint', 'both', ),
927 'r15': ( 'uint', 'both', ),
928 # 16-bit, 32-bit or 64-bit registers according to operand size.
929 'oz.rax': ( 'uint', 'both', ),
930 'oz.rdx': ( 'uint', 'both', ),
931 'oz.rcx': ( 'uint', 'both', ),
932 'oz.rbx': ( 'uint', 'both', ),
933 'oz.rsp': ( 'uint', 'both', ),
934 'oz.rbp': ( 'uint', 'both', ),
935 'oz.rsi': ( 'uint', 'both', ),
936 'oz.rdi': ( 'uint', 'both', ),
937 'oz.r8': ( 'uint', 'both', ),
938 'oz.r9': ( 'uint', 'both', ),
939 'oz.r10': ( 'uint', 'both', ),
940 'oz.r11': ( 'uint', 'both', ),
941 'oz.r12': ( 'uint', 'both', ),
942 'oz.r13': ( 'uint', 'both', ),
943 'oz.r14': ( 'uint', 'both', ),
944 'oz.r15': ( 'uint', 'both', ),
945 # Control registers.
946 'cr0': ( 'cr0', 'both', ),
947 'cr4': ( 'cr4', 'both', ),
948 'xcr0': ( 'xcr0', 'both', ),
949 # FPU Registers
950 'fcw': ( 'uint', 'both', ),
951 'fsw': ( 'uint', 'both', ),
952 'ftw': ( 'uint', 'both', ),
953 'fop': ( 'uint', 'both', ),
954 'fpuip': ( 'uint', 'both', ),
955 'fpucs': ( 'uint', 'both', ),
956 'fpudp': ( 'uint', 'both', ),
957 'fpuds': ( 'uint', 'both', ),
958 'mxcsr': ( 'uint', 'both', ),
959 'st0': ( 'uint', 'both', ),
960 'st1': ( 'uint', 'both', ),
961 'st2': ( 'uint', 'both', ),
962 'st3': ( 'uint', 'both', ),
963 'st4': ( 'uint', 'both', ),
964 'st5': ( 'uint', 'both', ),
965 'st6': ( 'uint', 'both', ),
966 'st7': ( 'uint', 'both', ),
967 # MMX registers.
968 'mm0': ( 'uint', 'both', ),
969 'mm1': ( 'uint', 'both', ),
970 'mm2': ( 'uint', 'both', ),
971 'mm3': ( 'uint', 'both', ),
972 'mm4': ( 'uint', 'both', ),
973 'mm5': ( 'uint', 'both', ),
974 'mm6': ( 'uint', 'both', ),
975 'mm7': ( 'uint', 'both', ),
976 # SSE registers.
977 'xmm0': ( 'uint', 'both', ),
978 'xmm1': ( 'uint', 'both', ),
979 'xmm2': ( 'uint', 'both', ),
980 'xmm3': ( 'uint', 'both', ),
981 'xmm4': ( 'uint', 'both', ),
982 'xmm5': ( 'uint', 'both', ),
983 'xmm6': ( 'uint', 'both', ),
984 'xmm7': ( 'uint', 'both', ),
985 'xmm8': ( 'uint', 'both', ),
986 'xmm9': ( 'uint', 'both', ),
987 'xmm10': ( 'uint', 'both', ),
988 'xmm11': ( 'uint', 'both', ),
989 'xmm12': ( 'uint', 'both', ),
990 'xmm13': ( 'uint', 'both', ),
991 'xmm14': ( 'uint', 'both', ),
992 'xmm15': ( 'uint', 'both', ),
993 'xmm0.lo': ( 'uint', 'both', ),
994 'xmm1.lo': ( 'uint', 'both', ),
995 'xmm2.lo': ( 'uint', 'both', ),
996 'xmm3.lo': ( 'uint', 'both', ),
997 'xmm4.lo': ( 'uint', 'both', ),
998 'xmm5.lo': ( 'uint', 'both', ),
999 'xmm6.lo': ( 'uint', 'both', ),
1000 'xmm7.lo': ( 'uint', 'both', ),
1001 'xmm8.lo': ( 'uint', 'both', ),
1002 'xmm9.lo': ( 'uint', 'both', ),
1003 'xmm10.lo': ( 'uint', 'both', ),
1004 'xmm11.lo': ( 'uint', 'both', ),
1005 'xmm12.lo': ( 'uint', 'both', ),
1006 'xmm13.lo': ( 'uint', 'both', ),
1007 'xmm14.lo': ( 'uint', 'both', ),
1008 'xmm15.lo': ( 'uint', 'both', ),
1009 'xmm0.hi': ( 'uint', 'both', ),
1010 'xmm1.hi': ( 'uint', 'both', ),
1011 'xmm2.hi': ( 'uint', 'both', ),
1012 'xmm3.hi': ( 'uint', 'both', ),
1013 'xmm4.hi': ( 'uint', 'both', ),
1014 'xmm5.hi': ( 'uint', 'both', ),
1015 'xmm6.hi': ( 'uint', 'both', ),
1016 'xmm7.hi': ( 'uint', 'both', ),
1017 'xmm8.hi': ( 'uint', 'both', ),
1018 'xmm9.hi': ( 'uint', 'both', ),
1019 'xmm10.hi': ( 'uint', 'both', ),
1020 'xmm11.hi': ( 'uint', 'both', ),
1021 'xmm12.hi': ( 'uint', 'both', ),
1022 'xmm13.hi': ( 'uint', 'both', ),
1023 'xmm14.hi': ( 'uint', 'both', ),
1024 'xmm15.hi': ( 'uint', 'both', ),
1025 'xmm0.lo.zx': ( 'uint', 'both', ),
1026 'xmm1.lo.zx': ( 'uint', 'both', ),
1027 'xmm2.lo.zx': ( 'uint', 'both', ),
1028 'xmm3.lo.zx': ( 'uint', 'both', ),
1029 'xmm4.lo.zx': ( 'uint', 'both', ),
1030 'xmm5.lo.zx': ( 'uint', 'both', ),
1031 'xmm6.lo.zx': ( 'uint', 'both', ),
1032 'xmm7.lo.zx': ( 'uint', 'both', ),
1033 'xmm8.lo.zx': ( 'uint', 'both', ),
1034 'xmm9.lo.zx': ( 'uint', 'both', ),
1035 'xmm10.lo.zx': ( 'uint', 'both', ),
1036 'xmm11.lo.zx': ( 'uint', 'both', ),
1037 'xmm12.lo.zx': ( 'uint', 'both', ),
1038 'xmm13.lo.zx': ( 'uint', 'both', ),
1039 'xmm14.lo.zx': ( 'uint', 'both', ),
1040 'xmm15.lo.zx': ( 'uint', 'both', ),
1041 'xmm0.dw0': ( 'uint', 'both', ),
1042 'xmm1.dw0': ( 'uint', 'both', ),
1043 'xmm2.dw0': ( 'uint', 'both', ),
1044 'xmm3.dw0': ( 'uint', 'both', ),
1045 'xmm4.dw0': ( 'uint', 'both', ),
1046 'xmm5.dw0': ( 'uint', 'both', ),
1047 'xmm6.dw0': ( 'uint', 'both', ),
1048 'xmm7.dw0': ( 'uint', 'both', ),
1049 'xmm8.dw0': ( 'uint', 'both', ),
1050 'xmm9.dw0': ( 'uint', 'both', ),
1051 'xmm10.dw0': ( 'uint', 'both', ),
1052 'xmm11.dw0': ( 'uint', 'both', ),
1053 'xmm12.dw0': ( 'uint', 'both', ),
1054 'xmm13.dw0': ( 'uint', 'both', ),
1055 'xmm14.dw0': ( 'uint', 'both', ),
1056 'xmm15_dw0': ( 'uint', 'both', ),
1057 # AVX registers.
1058 'ymm0': ( 'uint', 'both', ),
1059 'ymm1': ( 'uint', 'both', ),
1060 'ymm2': ( 'uint', 'both', ),
1061 'ymm3': ( 'uint', 'both', ),
1062 'ymm4': ( 'uint', 'both', ),
1063 'ymm5': ( 'uint', 'both', ),
1064 'ymm6': ( 'uint', 'both', ),
1065 'ymm7': ( 'uint', 'both', ),
1066 'ymm8': ( 'uint', 'both', ),
1067 'ymm9': ( 'uint', 'both', ),
1068 'ymm10': ( 'uint', 'both', ),
1069 'ymm11': ( 'uint', 'both', ),
1070 'ymm12': ( 'uint', 'both', ),
1071 'ymm13': ( 'uint', 'both', ),
1072 'ymm14': ( 'uint', 'both', ),
1073 'ymm15': ( 'uint', 'both', ),
1074
1075 # Special ones.
1076 'value.xcpt': ( 'uint', 'output', ),
1077 };
1078
1079 def __init__(self, sField, sOp, sValue, sType):
1080 assert sField in self.kdFields;
1081 assert sOp in self.kasOperators;
1082 self.sField = sField;
1083 self.sOp = sOp;
1084 self.sValue = sValue;
1085 self.sType = sType;
1086 assert isinstance(sField, str);
1087 assert isinstance(sOp, str);
1088 assert isinstance(sType, str);
1089 assert isinstance(sValue, str);
1090
1091
1092class TestSelector(object):
1093 """
1094 One selector for an instruction test.
1095 """
1096 ## Selector compare operators.
1097 kasCompareOps = [ '==', '!=' ];
1098 ## Selector variables and their valid values.
1099 kdVariables = {
1100 # Operand size.
1101 'size': {
1102 'o16': 'size_o16',
1103 'o32': 'size_o32',
1104 'o64': 'size_o64',
1105 },
1106 # Execution ring.
1107 'ring': {
1108 '0': 'ring_0',
1109 '1': 'ring_1',
1110 '2': 'ring_2',
1111 '3': 'ring_3',
1112 '0..2': 'ring_0_thru_2',
1113 '1..3': 'ring_1_thru_3',
1114 },
1115 # Basic code mode.
1116 'codebits': {
1117 '64': 'code_64bit',
1118 '32': 'code_32bit',
1119 '16': 'code_16bit',
1120 },
1121 # cpu modes.
1122 'mode': {
1123 'real': 'mode_real',
1124 'prot': 'mode_prot',
1125 'long': 'mode_long',
1126 'v86': 'mode_v86',
1127 'smm': 'mode_smm',
1128 'vmx': 'mode_vmx',
1129 'svm': 'mode_svm',
1130 },
1131 # paging on/off
1132 'paging': {
1133 'on': 'paging_on',
1134 'off': 'paging_off',
1135 },
1136 # CPU vendor
1137 'vendor': {
1138 'amd': 'vendor_amd',
1139 'intel': 'vendor_intel',
1140 'via': 'vendor_via',
1141 },
1142 };
1143 ## Selector shorthand predicates.
1144 ## These translates into variable expressions.
1145 kdPredicates = {
1146 'o16': 'size==o16',
1147 'o32': 'size==o32',
1148 'o64': 'size==o64',
1149 'ring0': 'ring==0',
1150 '!ring0': 'ring==1..3',
1151 'ring1': 'ring==1',
1152 'ring2': 'ring==2',
1153 'ring3': 'ring==3',
1154 'user': 'ring==3',
1155 'supervisor': 'ring==0..2',
1156 'real': 'mode==real',
1157 'prot': 'mode==prot',
1158 'long': 'mode==long',
1159 'v86': 'mode==v86',
1160 'smm': 'mode==smm',
1161 'vmx': 'mode==vmx',
1162 'svm': 'mode==svm',
1163 'paging': 'paging==on',
1164 '!paging': 'paging==off',
1165 'amd': 'vendor==amd',
1166 '!amd': 'vendor!=amd',
1167 'intel': 'vendor==intel',
1168 '!intel': 'vendor!=intel',
1169 'via': 'vendor==via',
1170 '!via': 'vendor!=via',
1171 };
1172
1173 def __init__(self, sVariable, sOp, sValue):
1174 assert sVariable in self.kdVariables;
1175 assert sOp in self.kasCompareOps;
1176 assert sValue in self.kdVariables[sVariable];
1177 self.sVariable = sVariable;
1178 self.sOp = sOp;
1179 self.sValue = sValue;
1180
1181
1182class InstructionTest(object):
1183 """
1184 Instruction test.
1185 """
1186
1187 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1188 self.oInstr = oInstr; # type: InstructionTest
1189 self.aoInputs = []; # type: list(TestInOut)
1190 self.aoOutputs = []; # type: list(TestInOut)
1191 self.aoSelectors = []; # type: list(TestSelector)
1192
1193 def toString(self, fRepr = False):
1194 """
1195 Converts it to string representation.
1196 """
1197 asWords = [];
1198 if self.aoSelectors:
1199 for oSelector in self.aoSelectors:
1200 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1201 asWords.append('/');
1202
1203 for oModifier in self.aoInputs:
1204 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1205
1206 asWords.append('->');
1207
1208 for oModifier in self.aoOutputs:
1209 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1210
1211 if fRepr:
1212 return '<' + ' '.join(asWords) + '>';
1213 return ' '.join(asWords);
1214
1215 def __str__(self):
1216 """ Provide string represenation. """
1217 return self.toString(False);
1218
1219 def __repr__(self):
1220 """ Provide unambigious string representation. """
1221 return self.toString(True);
1222
1223class Operand(object):
1224 """
1225 Instruction operand.
1226 """
1227
1228 def __init__(self, sWhere, sType):
1229 assert sWhere in g_kdOpLocations, sWhere;
1230 assert sType in g_kdOpTypes, sType;
1231 self.sWhere = sWhere; ##< g_kdOpLocations
1232 self.sType = sType; ##< g_kdOpTypes
1233
1234 def usesModRM(self):
1235 """ Returns True if using some form of ModR/M encoding. """
1236 return self.sType[0] in ['E', 'G', 'M'];
1237
1238
1239
1240class Instruction(object): # pylint: disable=too-many-instance-attributes
1241 """
1242 Instruction.
1243 """
1244
1245 def __init__(self, sSrcFile, iLine):
1246 ## @name Core attributes.
1247 ## @{
1248 self.sMnemonic = None;
1249 self.sBrief = None;
1250 self.asDescSections = []; # type: list(str)
1251 self.aoMaps = []; # type: list(InstructionMap)
1252 self.aoOperands = []; # type: list(Operand)
1253 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1254 self.sOpcode = None; # type: str
1255 self.sSubOpcode = None; # type: str
1256 self.sEncoding = None;
1257 self.asFlTest = None;
1258 self.asFlModify = None;
1259 self.asFlUndefined = None;
1260 self.asFlSet = None;
1261 self.asFlClear = None;
1262 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1263 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1264 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1265 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1266 self.aoTests = []; # type: list(InstructionTest)
1267 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1268 self.oCpuExpr = None; ##< Some CPU restriction expression...
1269 self.sGroup = None;
1270 self.fUnused = False; ##< Unused instruction.
1271 self.fInvalid = False; ##< Invalid instruction (like UD2).
1272 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1273 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1274 ## @}
1275
1276 ## @name Implementation attributes.
1277 ## @{
1278 self.sStats = None;
1279 self.sFunction = None;
1280 self.fStub = False;
1281 self.fUdStub = False;
1282 ## @}
1283
1284 ## @name Decoding info
1285 ## @{
1286 self.sSrcFile = sSrcFile;
1287 self.iLineCreated = iLine;
1288 self.iLineCompleted = None;
1289 self.cOpTags = 0;
1290 self.iLineFnIemOpMacro = -1;
1291 self.iLineMnemonicMacro = -1;
1292 ## @}
1293
1294 ## @name Intermediate input fields.
1295 ## @{
1296 self.sRawDisOpNo = None;
1297 self.asRawDisParams = [];
1298 self.sRawIemOpFlags = None;
1299 self.sRawOldOpcodes = None;
1300 self.asCopyTests = [];
1301 ## @}
1302
1303 def toString(self, fRepr = False):
1304 """ Turn object into a string. """
1305 aasFields = [];
1306
1307 aasFields.append(['opcode', self.sOpcode]);
1308 aasFields.append(['mnemonic', self.sMnemonic]);
1309 for iOperand, oOperand in enumerate(self.aoOperands):
1310 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1311 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1312 aasFields.append(['encoding', self.sEncoding]);
1313 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1314 aasFields.append(['disenum', self.sDisEnum]);
1315 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1316 aasFields.append(['group', self.sGroup]);
1317 if self.fUnused: aasFields.append(['unused', 'True']);
1318 if self.fInvalid: aasFields.append(['invalid', 'True']);
1319 aasFields.append(['invlstyle', self.sInvalidStyle]);
1320 aasFields.append(['fltest', self.asFlTest]);
1321 aasFields.append(['flmodify', self.asFlModify]);
1322 aasFields.append(['flundef', self.asFlUndefined]);
1323 aasFields.append(['flset', self.asFlSet]);
1324 aasFields.append(['flclear', self.asFlClear]);
1325 aasFields.append(['mincpu', self.sMinCpu]);
1326 aasFields.append(['stats', self.sStats]);
1327 aasFields.append(['sFunction', self.sFunction]);
1328 if self.fStub: aasFields.append(['fStub', 'True']);
1329 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1330 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1331 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1332 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1333
1334 sRet = '<' if fRepr else '';
1335 for sField, sValue in aasFields:
1336 if sValue != None:
1337 if len(sRet) > 1:
1338 sRet += '; ';
1339 sRet += '%s=%s' % (sField, sValue,);
1340 if fRepr:
1341 sRet += '>';
1342
1343 return sRet;
1344
1345 def __str__(self):
1346 """ Provide string represenation. """
1347 return self.toString(False);
1348
1349 def __repr__(self):
1350 """ Provide unambigious string representation. """
1351 return self.toString(True);
1352
1353 def getOpcodeByte(self):
1354 """
1355 Decodes sOpcode into a byte range integer value.
1356 Raises exception if sOpcode is None or invalid.
1357 """
1358 if self.sOpcode is None:
1359 raise Exception('No opcode byte for %s!' % (self,));
1360 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1361
1362 # Full hex byte form.
1363 if sOpcode[:2] == '0x':
1364 return int(sOpcode, 16);
1365
1366 # The /r form:
1367 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1368 return int(sOpcode[-1:]) << 3;
1369
1370 # The 11/r form:
1371 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1372 return (int(sOpcode[-1:]) << 3) | 0xc0;
1373
1374 # The !11/r form (returns mod=1):
1375 ## @todo this doesn't really work...
1376 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1377 return (int(sOpcode[-1:]) << 3) | 0x80;
1378
1379 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1380
1381 @staticmethod
1382 def _flagsToIntegerMask(asFlags):
1383 """
1384 Returns the integer mask value for asFlags.
1385 """
1386 uRet = 0;
1387 if asFlags:
1388 for sFlag in asFlags:
1389 sConstant = g_kdEFlagsMnemonics[sFlag];
1390 assert sConstant[0] != '!', sConstant
1391 uRet |= g_kdX86EFlagsConstants[sConstant];
1392 return uRet;
1393
1394 def getTestedFlagsMask(self):
1395 """ Returns asFlTest into a integer mask value """
1396 return self._flagsToIntegerMask(self.asFlTest);
1397
1398 def getModifiedFlagsMask(self):
1399 """ Returns asFlModify into a integer mask value """
1400 return self._flagsToIntegerMask(self.asFlModify);
1401
1402 def getUndefinedFlagsMask(self):
1403 """ Returns asFlUndefined into a integer mask value """
1404 return self._flagsToIntegerMask(self.asFlUndefined);
1405
1406 def getSetFlagsMask(self):
1407 """ Returns asFlSet into a integer mask value """
1408 return self._flagsToIntegerMask(self.asFlSet);
1409
1410 def getClearedFlagsMask(self):
1411 """ Returns asFlClear into a integer mask value """
1412 return self._flagsToIntegerMask(self.asFlClear);
1413
1414
1415## All the instructions.
1416g_aoAllInstructions = []; # type: list(Instruction)
1417
1418## All the instructions indexed by statistics name (opstat).
1419g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1420
1421## All the instructions indexed by function name (opfunction).
1422g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1423
1424## Instructions tagged by oponlytest
1425g_aoOnlyTestInstructions = []; # type: list(Instruction)
1426
1427## Instruction maps.
1428g_dInstructionMaps = {
1429 'one': InstructionMap('one'),
1430 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1431 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1432 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1433 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1434 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1435 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1436 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1437 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1438 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1439 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1440 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1441 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1442 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1443 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1444 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1445 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1446 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1447 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1448 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1449
1450 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1451 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1452 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1453 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1454 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1455 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1456 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1457 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1458 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1459 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1460 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1461 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1462 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1463 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1464
1465 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1466 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1467
1468 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1469 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1470 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1471 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1472 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1473 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1474
1475 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1476 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1477
1478 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1479 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1480 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1481 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1482 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1483 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1484 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1485 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1486};
1487
1488
1489
1490class ParserException(Exception):
1491 """ Parser exception """
1492 def __init__(self, sMessage):
1493 Exception.__init__(self, sMessage);
1494
1495
1496class SimpleParser(object):
1497 """
1498 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1499 """
1500
1501 ## @name Parser state.
1502 ## @{
1503 kiCode = 0;
1504 kiCommentMulti = 1;
1505 ## @}
1506
1507 def __init__(self, sSrcFile, asLines, sDefaultMap):
1508 self.sSrcFile = sSrcFile;
1509 self.asLines = asLines;
1510 self.iLine = 0;
1511 self.iState = self.kiCode;
1512 self.sComment = '';
1513 self.iCommentLine = 0;
1514 self.aoCurInstrs = [];
1515
1516 assert sDefaultMap in g_dInstructionMaps;
1517 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1518
1519 self.cTotalInstr = 0;
1520 self.cTotalStubs = 0;
1521 self.cTotalTagged = 0;
1522
1523 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1524 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1525 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1526 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1527 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1528 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1529 self.fDebug = True;
1530
1531 self.dTagHandlers = {
1532 '@opbrief': self.parseTagOpBrief,
1533 '@opdesc': self.parseTagOpDesc,
1534 '@opmnemonic': self.parseTagOpMnemonic,
1535 '@op1': self.parseTagOpOperandN,
1536 '@op2': self.parseTagOpOperandN,
1537 '@op3': self.parseTagOpOperandN,
1538 '@op4': self.parseTagOpOperandN,
1539 '@oppfx': self.parseTagOpPfx,
1540 '@opmaps': self.parseTagOpMaps,
1541 '@opcode': self.parseTagOpcode,
1542 '@opcodesub': self.parseTagOpcodeSub,
1543 '@openc': self.parseTagOpEnc,
1544 '@opfltest': self.parseTagOpEFlags,
1545 '@opflmodify': self.parseTagOpEFlags,
1546 '@opflundef': self.parseTagOpEFlags,
1547 '@opflset': self.parseTagOpEFlags,
1548 '@opflclear': self.parseTagOpEFlags,
1549 '@ophints': self.parseTagOpHints,
1550 '@opdisenum': self.parseTagOpDisEnum,
1551 '@opmincpu': self.parseTagOpMinCpu,
1552 '@opcpuid': self.parseTagOpCpuId,
1553 '@opgroup': self.parseTagOpGroup,
1554 '@opunused': self.parseTagOpUnusedInvalid,
1555 '@opinvalid': self.parseTagOpUnusedInvalid,
1556 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1557 '@optest': self.parseTagOpTest,
1558 '@optestign': self.parseTagOpTestIgnore,
1559 '@optestignore': self.parseTagOpTestIgnore,
1560 '@opcopytests': self.parseTagOpCopyTests,
1561 '@oponly': self.parseTagOpOnlyTest,
1562 '@oponlytest': self.parseTagOpOnlyTest,
1563 '@opxcpttype': self.parseTagOpXcptType,
1564 '@opstats': self.parseTagOpStats,
1565 '@opfunction': self.parseTagOpFunction,
1566 '@opdone': self.parseTagOpDone,
1567 };
1568 for i in range(48):
1569 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1570 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1571
1572 self.asErrors = [];
1573
1574 def raiseError(self, sMessage):
1575 """
1576 Raise error prefixed with the source and line number.
1577 """
1578 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1579
1580 def raiseCommentError(self, iLineInComment, sMessage):
1581 """
1582 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1583 """
1584 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1585
1586 def error(self, sMessage):
1587 """
1588 Adds an error.
1589 returns False;
1590 """
1591 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1592 return False;
1593
1594 def errorComment(self, iLineInComment, sMessage):
1595 """
1596 Adds a comment error.
1597 returns False;
1598 """
1599 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1600 return False;
1601
1602 def printErrors(self):
1603 """
1604 Print the errors to stderr.
1605 Returns number of errors.
1606 """
1607 if self.asErrors:
1608 sys.stderr.write(u''.join(self.asErrors));
1609 return len(self.asErrors);
1610
1611 def debug(self, sMessage):
1612 """
1613 For debugging.
1614 """
1615 if self.fDebug:
1616 print('debug: %s' % (sMessage,));
1617
1618
1619 def addInstruction(self, iLine = None):
1620 """
1621 Adds an instruction.
1622 """
1623 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1624 g_aoAllInstructions.append(oInstr);
1625 self.aoCurInstrs.append(oInstr);
1626 return oInstr;
1627
1628 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1629 """
1630 Derives the mnemonic and operands from a IEM stats base name like string.
1631 """
1632 if oInstr.sMnemonic is None:
1633 asWords = sStats.split('_');
1634 oInstr.sMnemonic = asWords[0].lower();
1635 if len(asWords) > 1 and not oInstr.aoOperands:
1636 for sType in asWords[1:]:
1637 if sType in g_kdOpTypes:
1638 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1639 else:
1640 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1641 return False;
1642 return True;
1643
1644 def doneInstructionOne(self, oInstr, iLine):
1645 """
1646 Complete the parsing by processing, validating and expanding raw inputs.
1647 """
1648 assert oInstr.iLineCompleted is None;
1649 oInstr.iLineCompleted = iLine;
1650
1651 #
1652 # Specified instructions.
1653 #
1654 if oInstr.cOpTags > 0:
1655 if oInstr.sStats is None:
1656 pass;
1657
1658 #
1659 # Unspecified legacy stuff. We generally only got a few things to go on here.
1660 # /** Opcode 0x0f 0x00 /0. */
1661 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1662 #
1663 else:
1664 #if oInstr.sRawOldOpcodes:
1665 #
1666 #if oInstr.sMnemonic:
1667 pass;
1668
1669 #
1670 # Common defaults.
1671 #
1672
1673 # Guess mnemonic and operands from stats if the former is missing.
1674 if oInstr.sMnemonic is None:
1675 if oInstr.sStats is not None:
1676 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1677 elif oInstr.sFunction is not None:
1678 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1679
1680 # Derive the disassembler op enum constant from the mnemonic.
1681 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1682 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1683
1684 # Derive the IEM statistics base name from mnemonic and operand types.
1685 if oInstr.sStats is None:
1686 if oInstr.sFunction is not None:
1687 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1688 elif oInstr.sMnemonic is not None:
1689 oInstr.sStats = oInstr.sMnemonic;
1690 for oOperand in oInstr.aoOperands:
1691 if oOperand.sType:
1692 oInstr.sStats += '_' + oOperand.sType;
1693
1694 # Derive the IEM function name from mnemonic and operand types.
1695 if oInstr.sFunction is None:
1696 if oInstr.sMnemonic is not None:
1697 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1698 for oOperand in oInstr.aoOperands:
1699 if oOperand.sType:
1700 oInstr.sFunction += '_' + oOperand.sType;
1701 elif oInstr.sStats:
1702 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1703
1704 # Derive encoding from operands.
1705 if oInstr.sEncoding is None:
1706 if not oInstr.aoOperands:
1707 if oInstr.fUnused and oInstr.sSubOpcode:
1708 oInstr.sEncoding = 'ModR/M';
1709 else:
1710 oInstr.sEncoding = 'fixed';
1711 elif oInstr.aoOperands[0].usesModRM():
1712 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1713 oInstr.sEncoding = 'ModR/M+VEX';
1714 else:
1715 oInstr.sEncoding = 'ModR/M';
1716
1717 #
1718 # Apply default map and then add the instruction to all it's groups.
1719 #
1720 if not oInstr.aoMaps:
1721 oInstr.aoMaps = [ self.oDefaultMap, ];
1722 for oMap in oInstr.aoMaps:
1723 oMap.aoInstructions.append(oInstr);
1724
1725 #
1726 # Check the opstat value and add it to the opstat indexed dictionary.
1727 #
1728 if oInstr.sStats:
1729 if oInstr.sStats not in g_dAllInstructionsByStat:
1730 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1731 else:
1732 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1733 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1734
1735 #
1736 # Add to function indexed dictionary. We allow multiple instructions per function.
1737 #
1738 if oInstr.sFunction:
1739 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1740 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1741 else:
1742 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1743
1744 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1745 return True;
1746
1747 def doneInstructions(self, iLineInComment = None):
1748 """
1749 Done with current instruction.
1750 """
1751 for oInstr in self.aoCurInstrs:
1752 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1753 if oInstr.fStub:
1754 self.cTotalStubs += 1;
1755
1756 self.cTotalInstr += len(self.aoCurInstrs);
1757
1758 self.sComment = '';
1759 self.aoCurInstrs = [];
1760 return True;
1761
1762 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1763 """
1764 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1765 is False, only None values and empty strings are replaced.
1766 """
1767 for oInstr in self.aoCurInstrs:
1768 if fOverwrite is not True:
1769 oOldValue = getattr(oInstr, sAttrib);
1770 if oOldValue is not None:
1771 continue;
1772 setattr(oInstr, sAttrib, oValue);
1773
1774 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1775 """
1776 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1777 If fOverwrite is False, only None values and empty strings are replaced.
1778 """
1779 for oInstr in self.aoCurInstrs:
1780 aoArray = getattr(oInstr, sAttrib);
1781 while len(aoArray) <= iEntry:
1782 aoArray.append(None);
1783 if fOverwrite is True or aoArray[iEntry] is None:
1784 aoArray[iEntry] = oValue;
1785
1786 def parseCommentOldOpcode(self, asLines):
1787 """ Deals with 'Opcode 0xff /4' like comments """
1788 asWords = asLines[0].split();
1789 if len(asWords) >= 2 \
1790 and asWords[0] == 'Opcode' \
1791 and ( asWords[1].startswith('0x')
1792 or asWords[1].startswith('0X')):
1793 asWords = asWords[:1];
1794 for iWord, sWord in enumerate(asWords):
1795 if sWord.startswith('0X'):
1796 sWord = '0x' + sWord[:2];
1797 asWords[iWord] = asWords;
1798 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1799
1800 return False;
1801
1802 def ensureInstructionForOpTag(self, iTagLine):
1803 """ Ensure there is an instruction for the op-tag being parsed. """
1804 if not self.aoCurInstrs:
1805 self.addInstruction(self.iCommentLine + iTagLine);
1806 for oInstr in self.aoCurInstrs:
1807 oInstr.cOpTags += 1;
1808 if oInstr.cOpTags == 1:
1809 self.cTotalTagged += 1;
1810 return self.aoCurInstrs[-1];
1811
1812 @staticmethod
1813 def flattenSections(aasSections):
1814 """
1815 Flattens multiline sections into stripped single strings.
1816 Returns list of strings, on section per string.
1817 """
1818 asRet = [];
1819 for asLines in aasSections:
1820 if asLines:
1821 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1822 return asRet;
1823
1824 @staticmethod
1825 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1826 """
1827 Flattens sections into a simple stripped string with newlines as
1828 section breaks. The final section does not sport a trailing newline.
1829 """
1830 # Typical: One section with a single line.
1831 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1832 return aasSections[0][0].strip();
1833
1834 sRet = '';
1835 for iSection, asLines in enumerate(aasSections):
1836 if asLines:
1837 if iSection > 0:
1838 sRet += sSectionSep;
1839 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1840 return sRet;
1841
1842
1843
1844 ## @name Tag parsers
1845 ## @{
1846
1847 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1848 """
1849 Tag: \@opbrief
1850 Value: Text description, multiple sections, appended.
1851
1852 Brief description. If not given, it's the first sentence from @opdesc.
1853 """
1854 oInstr = self.ensureInstructionForOpTag(iTagLine);
1855
1856 # Flatten and validate the value.
1857 sBrief = self.flattenAllSections(aasSections);
1858 if not sBrief:
1859 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1860 if sBrief[-1] != '.':
1861 sBrief = sBrief + '.';
1862 if len(sBrief) > 180:
1863 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1864 offDot = sBrief.find('.');
1865 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1866 offDot = sBrief.find('.', offDot + 1);
1867 if offDot >= 0 and offDot != len(sBrief) - 1:
1868 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1869
1870 # Update the instruction.
1871 if oInstr.sBrief is not None:
1872 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1873 % (sTag, oInstr.sBrief, sBrief,));
1874 _ = iEndLine;
1875 return True;
1876
1877 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1878 """
1879 Tag: \@opdesc
1880 Value: Text description, multiple sections, appended.
1881
1882 It is used to describe instructions.
1883 """
1884 oInstr = self.ensureInstructionForOpTag(iTagLine);
1885 if aasSections:
1886 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1887 return True;
1888
1889 _ = sTag; _ = iEndLine;
1890 return True;
1891
1892 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1893 """
1894 Tag: @opmenmonic
1895 Value: mnemonic
1896
1897 The 'mnemonic' value must be a valid C identifier string. Because of
1898 prefixes, groups and whatnot, there times when the mnemonic isn't that
1899 of an actual assembler mnemonic.
1900 """
1901 oInstr = self.ensureInstructionForOpTag(iTagLine);
1902
1903 # Flatten and validate the value.
1904 sMnemonic = self.flattenAllSections(aasSections);
1905 if not self.oReMnemonic.match(sMnemonic):
1906 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1907 if oInstr.sMnemonic is not None:
1908 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1909 % (sTag, oInstr.sMnemonic, sMnemonic,));
1910 oInstr.sMnemonic = sMnemonic
1911
1912 _ = iEndLine;
1913 return True;
1914
1915 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1916 """
1917 Tags: \@op1, \@op2, \@op3, \@op4
1918 Value: [where:]type
1919
1920 The 'where' value indicates where the operand is found, like the 'reg'
1921 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1922 a list.
1923
1924 The 'type' value indicates the operand type. These follow the types
1925 given in the opcode tables in the CPU reference manuals.
1926 See Instruction.kdOperandTypes for a list.
1927
1928 """
1929 oInstr = self.ensureInstructionForOpTag(iTagLine);
1930 idxOp = int(sTag[-1]) - 1;
1931 assert idxOp >= 0 and idxOp < 4;
1932
1933 # flatten, split up, and validate the "where:type" value.
1934 sFlattened = self.flattenAllSections(aasSections);
1935 asSplit = sFlattened.split(':');
1936 if len(asSplit) == 1:
1937 sType = asSplit[0];
1938 sWhere = None;
1939 elif len(asSplit) == 2:
1940 (sWhere, sType) = asSplit;
1941 else:
1942 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1943
1944 if sType not in g_kdOpTypes:
1945 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1946 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1947 if sWhere is None:
1948 sWhere = g_kdOpTypes[sType][1];
1949 elif sWhere not in g_kdOpLocations:
1950 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1951 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1952
1953 # Insert the operand, refusing to overwrite an existing one.
1954 while idxOp >= len(oInstr.aoOperands):
1955 oInstr.aoOperands.append(None);
1956 if oInstr.aoOperands[idxOp] is not None:
1957 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1958 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1959 sWhere, sType,));
1960 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1961
1962 _ = iEndLine;
1963 return True;
1964
1965 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1966 """
1967 Tag: \@opmaps
1968 Value: map[,map2]
1969
1970 Indicates which maps the instruction is in. There is a default map
1971 associated with each input file.
1972 """
1973 oInstr = self.ensureInstructionForOpTag(iTagLine);
1974
1975 # Flatten, split up and validate the value.
1976 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1977 asMaps = sFlattened.split(',');
1978 if not asMaps:
1979 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1980 for sMap in asMaps:
1981 if sMap not in g_dInstructionMaps:
1982 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1983 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1984
1985 # Add the maps to the current list. Throw errors on duplicates.
1986 for oMap in oInstr.aoMaps:
1987 if oMap.sName in asMaps:
1988 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1989
1990 for sMap in asMaps:
1991 oMap = g_dInstructionMaps[sMap];
1992 if oMap not in oInstr.aoMaps:
1993 oInstr.aoMaps.append(oMap);
1994 else:
1995 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1996
1997 _ = iEndLine;
1998 return True;
1999
2000 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2001 """
2002 Tag: \@oppfx
2003 Value: n/a|none|0x66|0xf3|0xf2
2004
2005 Required prefix for the instruction. (In a (E)VEX context this is the
2006 value of the 'pp' field rather than an actual prefix.)
2007 """
2008 oInstr = self.ensureInstructionForOpTag(iTagLine);
2009
2010 # Flatten and validate the value.
2011 sFlattened = self.flattenAllSections(aasSections);
2012 asPrefixes = sFlattened.split();
2013 if len(asPrefixes) > 1:
2014 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2015
2016 sPrefix = asPrefixes[0].lower();
2017 if sPrefix == 'none':
2018 sPrefix = 'none';
2019 elif sPrefix == 'n/a':
2020 sPrefix = None;
2021 else:
2022 if len(sPrefix) == 2:
2023 sPrefix = '0x' + sPrefix;
2024 if not _isValidOpcodeByte(sPrefix):
2025 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2026
2027 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2028 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2029
2030 # Set it.
2031 if oInstr.sPrefix is not None:
2032 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2033 oInstr.sPrefix = sPrefix;
2034
2035 _ = iEndLine;
2036 return True;
2037
2038 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2039 """
2040 Tag: \@opcode
2041 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2042
2043 The opcode byte or sub-byte for the instruction in the context of a map.
2044 """
2045 oInstr = self.ensureInstructionForOpTag(iTagLine);
2046
2047 # Flatten and validate the value.
2048 sOpcode = self.flattenAllSections(aasSections);
2049 if _isValidOpcodeByte(sOpcode):
2050 pass;
2051 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2052 pass;
2053 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2054 pass;
2055 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2056 pass;
2057 else:
2058 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2059
2060 # Set it.
2061 if oInstr.sOpcode is not None:
2062 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2063 oInstr.sOpcode = sOpcode;
2064
2065 _ = iEndLine;
2066 return True;
2067
2068 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2069 """
2070 Tag: \@opcodesub
2071 Value: none | 11 mr/reg | !11 mr/reg
2072
2073 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2074 represents exactly two different instructions. The more proper way would
2075 be to go via maps with two members, but this is faster.
2076 """
2077 oInstr = self.ensureInstructionForOpTag(iTagLine);
2078
2079 # Flatten and validate the value.
2080 sSubOpcode = self.flattenAllSections(aasSections);
2081 if sSubOpcode not in g_kdSubOpcodes:
2082 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2083 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2084
2085 # Set it.
2086 if oInstr.sSubOpcode is not None:
2087 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2088 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2089 oInstr.sSubOpcode = sSubOpcode;
2090
2091 _ = iEndLine;
2092 return True;
2093
2094 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2095 """
2096 Tag: \@openc
2097 Value: ModR/M|fixed|prefix|<map name>
2098
2099 The instruction operand encoding style.
2100 """
2101 oInstr = self.ensureInstructionForOpTag(iTagLine);
2102
2103 # Flatten and validate the value.
2104 sEncoding = self.flattenAllSections(aasSections);
2105 if sEncoding in g_kdEncodings:
2106 pass;
2107 elif sEncoding in g_dInstructionMaps:
2108 pass;
2109 elif not _isValidOpcodeByte(sEncoding):
2110 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2111
2112 # Set it.
2113 if oInstr.sEncoding is not None:
2114 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2115 % ( sTag, oInstr.sEncoding, sEncoding,));
2116 oInstr.sEncoding = sEncoding;
2117
2118 _ = iEndLine;
2119 return True;
2120
2121 ## EFlags tag to Instruction attribute name.
2122 kdOpFlagToAttr = {
2123 '@opfltest': 'asFlTest',
2124 '@opflmodify': 'asFlModify',
2125 '@opflundef': 'asFlUndefined',
2126 '@opflset': 'asFlSet',
2127 '@opflclear': 'asFlClear',
2128 };
2129
2130 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2131 """
2132 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2133 Value: <eflags specifier>
2134
2135 """
2136 oInstr = self.ensureInstructionForOpTag(iTagLine);
2137
2138 # Flatten, split up and validate the values.
2139 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2140 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2141 asFlags = [];
2142 else:
2143 fRc = True;
2144 for iFlag, sFlag in enumerate(asFlags):
2145 if sFlag not in g_kdEFlagsMnemonics:
2146 if sFlag.strip() in g_kdEFlagsMnemonics:
2147 asFlags[iFlag] = sFlag.strip();
2148 else:
2149 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2150 if not fRc:
2151 return False;
2152
2153 # Set them.
2154 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2155 if asOld is not None:
2156 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2157 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2158
2159 _ = iEndLine;
2160 return True;
2161
2162 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2163 """
2164 Tag: \@ophints
2165 Value: Comma or space separated list of flags and hints.
2166
2167 This covers the disassembler flags table and more.
2168 """
2169 oInstr = self.ensureInstructionForOpTag(iTagLine);
2170
2171 # Flatten as a space separated list, split it up and validate the values.
2172 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2173 if len(asHints) == 1 and asHints[0].lower() == 'none':
2174 asHints = [];
2175 else:
2176 fRc = True;
2177 for iHint, sHint in enumerate(asHints):
2178 if sHint not in g_kdHints:
2179 if sHint.strip() in g_kdHints:
2180 sHint[iHint] = sHint.strip();
2181 else:
2182 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2183 if not fRc:
2184 return False;
2185
2186 # Append them.
2187 for sHint in asHints:
2188 if sHint not in oInstr.dHints:
2189 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2190 else:
2191 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2192
2193 _ = iEndLine;
2194 return True;
2195
2196 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2197 """
2198 Tag: \@opdisenum
2199 Value: OP_XXXX
2200
2201 This is for select a specific (legacy) disassembler enum value for the
2202 instruction.
2203 """
2204 oInstr = self.ensureInstructionForOpTag(iTagLine);
2205
2206 # Flatten and split.
2207 asWords = self.flattenAllSections(aasSections).split();
2208 if len(asWords) != 1:
2209 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2210 if not asWords:
2211 return False;
2212 sDisEnum = asWords[0];
2213 if not self.oReDisEnum.match(sDisEnum):
2214 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2215 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2216
2217 # Set it.
2218 if oInstr.sDisEnum is not None:
2219 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2220 oInstr.sDisEnum = sDisEnum;
2221
2222 _ = iEndLine;
2223 return True;
2224
2225 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2226 """
2227 Tag: \@opmincpu
2228 Value: <simple CPU name>
2229
2230 Indicates when this instruction was introduced.
2231 """
2232 oInstr = self.ensureInstructionForOpTag(iTagLine);
2233
2234 # Flatten the value, split into words, make sure there's just one, valid it.
2235 asCpus = self.flattenAllSections(aasSections).split();
2236 if len(asCpus) > 1:
2237 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2238
2239 sMinCpu = asCpus[0];
2240 if sMinCpu in g_kdCpuNames:
2241 oInstr.sMinCpu = sMinCpu;
2242 else:
2243 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2244 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2245
2246 # Set it.
2247 if oInstr.sMinCpu is None:
2248 oInstr.sMinCpu = sMinCpu;
2249 elif oInstr.sMinCpu != sMinCpu:
2250 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2251
2252 _ = iEndLine;
2253 return True;
2254
2255 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2256 """
2257 Tag: \@opcpuid
2258 Value: none | <CPUID flag specifier>
2259
2260 CPUID feature bit which is required for the instruction to be present.
2261 """
2262 oInstr = self.ensureInstructionForOpTag(iTagLine);
2263
2264 # Flatten as a space separated list, split it up and validate the values.
2265 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2266 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2267 asCpuIds = [];
2268 else:
2269 fRc = True;
2270 for iCpuId, sCpuId in enumerate(asCpuIds):
2271 if sCpuId not in g_kdCpuIdFlags:
2272 if sCpuId.strip() in g_kdCpuIdFlags:
2273 sCpuId[iCpuId] = sCpuId.strip();
2274 else:
2275 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2276 if not fRc:
2277 return False;
2278
2279 # Append them.
2280 for sCpuId in asCpuIds:
2281 if sCpuId not in oInstr.asCpuIds:
2282 oInstr.asCpuIds.append(sCpuId);
2283 else:
2284 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2285
2286 _ = iEndLine;
2287 return True;
2288
2289 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2290 """
2291 Tag: \@opgroup
2292 Value: op_grp1[_subgrp2[_subsubgrp3]]
2293
2294 Instruction grouping.
2295 """
2296 oInstr = self.ensureInstructionForOpTag(iTagLine);
2297
2298 # Flatten as a space separated list, split it up and validate the values.
2299 asGroups = self.flattenAllSections(aasSections).split();
2300 if len(asGroups) != 1:
2301 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2302 sGroup = asGroups[0];
2303 if not self.oReGroupName.match(sGroup):
2304 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2305 % (sTag, sGroup, self.oReGroupName.pattern));
2306
2307 # Set it.
2308 if oInstr.sGroup is not None:
2309 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2310 oInstr.sGroup = sGroup;
2311
2312 _ = iEndLine;
2313 return True;
2314
2315 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2316 """
2317 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2318 Value: <invalid opcode behaviour style>
2319
2320 The \@opunused indicates the specification is for a currently unused
2321 instruction encoding.
2322
2323 The \@opinvalid indicates the specification is for an invalid currently
2324 instruction encoding (like UD2).
2325
2326 The \@opinvlstyle just indicates how CPUs decode the instruction when
2327 not supported (\@opcpuid, \@opmincpu) or disabled.
2328 """
2329 oInstr = self.ensureInstructionForOpTag(iTagLine);
2330
2331 # Flatten as a space separated list, split it up and validate the values.
2332 asStyles = self.flattenAllSections(aasSections).split();
2333 if len(asStyles) != 1:
2334 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2335 sStyle = asStyles[0];
2336 if sStyle not in g_kdInvalidStyles:
2337 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2338 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2339 # Set it.
2340 if oInstr.sInvalidStyle is not None:
2341 return self.errorComment(iTagLine,
2342 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2343 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2344 oInstr.sInvalidStyle = sStyle;
2345 if sTag == '@opunused':
2346 oInstr.fUnused = True;
2347 elif sTag == '@opinvalid':
2348 oInstr.fInvalid = True;
2349
2350 _ = iEndLine;
2351 return True;
2352
2353 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2354 """
2355 Tag: \@optest
2356 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2357 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2358
2359 The main idea here is to generate basic instruction tests.
2360
2361 The probably simplest way of handling the diverse input, would be to use
2362 it to produce size optimized byte code for a simple interpreter that
2363 modifies the register input and output states.
2364
2365 An alternative to the interpreter would be creating multiple tables,
2366 but that becomes rather complicated wrt what goes where and then to use
2367 them in an efficient manner.
2368 """
2369 oInstr = self.ensureInstructionForOpTag(iTagLine);
2370
2371 #
2372 # Do it section by section.
2373 #
2374 for asSectionLines in aasSections:
2375 #
2376 # Sort the input into outputs, inputs and selector conditions.
2377 #
2378 sFlatSection = self.flattenAllSections([asSectionLines,]);
2379 if not sFlatSection:
2380 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2381 continue;
2382 oTest = InstructionTest(oInstr);
2383
2384 asSelectors = [];
2385 asInputs = [];
2386 asOutputs = [];
2387 asCur = asOutputs;
2388 fRc = True;
2389 asWords = sFlatSection.split();
2390 for iWord in range(len(asWords) - 1, -1, -1):
2391 sWord = asWords[iWord];
2392 # Check for array switchers.
2393 if sWord == '->':
2394 if asCur != asOutputs:
2395 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2396 break;
2397 asCur = asInputs;
2398 elif sWord == '/':
2399 if asCur != asInputs:
2400 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2401 break;
2402 asCur = asSelectors;
2403 else:
2404 asCur.insert(0, sWord);
2405
2406 #
2407 # Validate and add selectors.
2408 #
2409 for sCond in asSelectors:
2410 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2411 oSelector = None;
2412 for sOp in TestSelector.kasCompareOps:
2413 off = sCondExp.find(sOp);
2414 if off >= 0:
2415 sVariable = sCondExp[:off];
2416 sValue = sCondExp[off + len(sOp):];
2417 if sVariable in TestSelector.kdVariables:
2418 if sValue in TestSelector.kdVariables[sVariable]:
2419 oSelector = TestSelector(sVariable, sOp, sValue);
2420 else:
2421 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2422 % ( sTag, sValue, sCond,
2423 TestSelector.kdVariables[sVariable].keys(),));
2424 else:
2425 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2426 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2427 break;
2428 if oSelector is not None:
2429 for oExisting in oTest.aoSelectors:
2430 if oExisting.sVariable == oSelector.sVariable:
2431 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2432 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2433 oTest.aoSelectors.append(oSelector);
2434 else:
2435 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2436
2437 #
2438 # Validate outputs and inputs, adding them to the test as we go along.
2439 #
2440 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2441 asValidFieldKinds = [ 'both', sDesc, ];
2442 for sItem in asItems:
2443 oItem = None;
2444 for sOp in TestInOut.kasOperators:
2445 off = sItem.find(sOp);
2446 if off < 0:
2447 continue;
2448 sField = sItem[:off];
2449 sValueType = sItem[off + len(sOp):];
2450 if sField in TestInOut.kdFields \
2451 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2452 asSplit = sValueType.split(':', 1);
2453 sValue = asSplit[0];
2454 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2455 if sType in TestInOut.kdTypes:
2456 oValid = TestInOut.kdTypes[sType].validate(sValue);
2457 if oValid is True:
2458 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2459 oItem = TestInOut(sField, sOp, sValue, sType);
2460 else:
2461 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2462 % ( sTag, sDesc, sItem, ));
2463 else:
2464 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2465 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2466 else:
2467 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2468 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2469 else:
2470 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2471 % ( sTag, sDesc, sField, sItem,
2472 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2473 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2474 break;
2475 if oItem is not None:
2476 for oExisting in aoDst:
2477 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2478 self.errorComment(iTagLine,
2479 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2480 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2481 aoDst.append(oItem);
2482 else:
2483 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2484
2485 #
2486 # .
2487 #
2488 if fRc:
2489 oInstr.aoTests.append(oTest);
2490 else:
2491 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2492 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2493 % (sTag, asSelectors, asInputs, asOutputs,));
2494
2495 _ = iEndLine;
2496 return True;
2497
2498 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2499 """
2500 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2501 """
2502 oInstr = self.ensureInstructionForOpTag(iTagLine);
2503
2504 iTest = 0;
2505 if sTag[-1] == ']':
2506 iTest = int(sTag[8:-1]);
2507 else:
2508 iTest = int(sTag[7:]);
2509
2510 if iTest != len(oInstr.aoTests):
2511 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2512 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2513
2514 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2515 """
2516 Tag: \@optestign | \@optestignore
2517 Value: <value is ignored>
2518
2519 This is a simple trick to ignore a test while debugging another.
2520
2521 See also \@oponlytest.
2522 """
2523 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2524 return True;
2525
2526 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2527 """
2528 Tag: \@opcopytests
2529 Value: <opstat | function> [..]
2530 Example: \@opcopytests add_Eb_Gb
2531
2532 Trick to avoid duplicating tests for different encodings of the same
2533 operation.
2534 """
2535 oInstr = self.ensureInstructionForOpTag(iTagLine);
2536
2537 # Flatten, validate and append the copy job to the instruction. We execute
2538 # them after parsing all the input so we can handle forward references.
2539 asToCopy = self.flattenAllSections(aasSections).split();
2540 if not asToCopy:
2541 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2542 for sToCopy in asToCopy:
2543 if sToCopy not in oInstr.asCopyTests:
2544 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2545 oInstr.asCopyTests.append(sToCopy);
2546 else:
2547 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2548 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2549 else:
2550 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2551
2552 _ = iEndLine;
2553 return True;
2554
2555 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2556 """
2557 Tag: \@oponlytest | \@oponly
2558 Value: none
2559
2560 Only test instructions with this tag. This is a trick that is handy
2561 for singling out one or two new instructions or tests.
2562
2563 See also \@optestignore.
2564 """
2565 oInstr = self.ensureInstructionForOpTag(iTagLine);
2566
2567 # Validate and add instruction to only test dictionary.
2568 sValue = self.flattenAllSections(aasSections).strip();
2569 if sValue:
2570 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2571
2572 if oInstr not in g_aoOnlyTestInstructions:
2573 g_aoOnlyTestInstructions.append(oInstr);
2574
2575 _ = iEndLine;
2576 return True;
2577
2578 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2579 """
2580 Tag: \@opxcpttype
2581 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2582
2583 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2584 """
2585 oInstr = self.ensureInstructionForOpTag(iTagLine);
2586
2587 # Flatten as a space separated list, split it up and validate the values.
2588 asTypes = self.flattenAllSections(aasSections).split();
2589 if len(asTypes) != 1:
2590 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2591 sType = asTypes[0];
2592 if sType not in g_kdXcptTypes:
2593 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2594 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2595 # Set it.
2596 if oInstr.sXcptType is not None:
2597 return self.errorComment(iTagLine,
2598 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2599 % ( sTag, oInstr.sXcptType, sType,));
2600 oInstr.sXcptType = sType;
2601
2602 _ = iEndLine;
2603 return True;
2604
2605 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2606 """
2607 Tag: \@opfunction
2608 Value: <VMM function name>
2609
2610 This is for explicitly setting the IEM function name. Normally we pick
2611 this up from the FNIEMOP_XXX macro invocation after the description, or
2612 generate it from the mnemonic and operands.
2613
2614 It it thought it maybe necessary to set it when specifying instructions
2615 which implementation isn't following immediately or aren't implemented yet.
2616 """
2617 oInstr = self.ensureInstructionForOpTag(iTagLine);
2618
2619 # Flatten and validate the value.
2620 sFunction = self.flattenAllSections(aasSections);
2621 if not self.oReFunctionName.match(sFunction):
2622 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2623 % (sTag, sFunction, self.oReFunctionName.pattern));
2624
2625 if oInstr.sFunction is not None:
2626 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2627 % (sTag, oInstr.sFunction, sFunction,));
2628 oInstr.sFunction = sFunction;
2629
2630 _ = iEndLine;
2631 return True;
2632
2633 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2634 """
2635 Tag: \@opstats
2636 Value: <VMM statistics base name>
2637
2638 This is for explicitly setting the statistics name. Normally we pick
2639 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2640 the mnemonic and operands.
2641
2642 It it thought it maybe necessary to set it when specifying instructions
2643 which implementation isn't following immediately or aren't implemented yet.
2644 """
2645 oInstr = self.ensureInstructionForOpTag(iTagLine);
2646
2647 # Flatten and validate the value.
2648 sStats = self.flattenAllSections(aasSections);
2649 if not self.oReStatsName.match(sStats):
2650 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2651 % (sTag, sStats, self.oReStatsName.pattern));
2652
2653 if oInstr.sStats is not None:
2654 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2655 % (sTag, oInstr.sStats, sStats,));
2656 oInstr.sStats = sStats;
2657
2658 _ = iEndLine;
2659 return True;
2660
2661 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2662 """
2663 Tag: \@opdone
2664 Value: none
2665
2666 Used to explictily flush the instructions that have been specified.
2667 """
2668 sFlattened = self.flattenAllSections(aasSections);
2669 if sFlattened != '':
2670 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2671 _ = sTag; _ = iEndLine;
2672 return self.doneInstructions();
2673
2674 ## @}
2675
2676
2677 def parseComment(self):
2678 """
2679 Parse the current comment (self.sComment).
2680
2681 If it's a opcode specifiying comment, we reset the macro stuff.
2682 """
2683 #
2684 # Reject if comment doesn't seem to contain anything interesting.
2685 #
2686 if self.sComment.find('Opcode') < 0 \
2687 and self.sComment.find('@') < 0:
2688 return False;
2689
2690 #
2691 # Split the comment into lines, removing leading asterisks and spaces.
2692 # Also remove leading and trailing empty lines.
2693 #
2694 asLines = self.sComment.split('\n');
2695 for iLine, sLine in enumerate(asLines):
2696 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2697
2698 while asLines and not asLines[0]:
2699 self.iCommentLine += 1;
2700 asLines.pop(0);
2701
2702 while asLines and not asLines[-1]:
2703 asLines.pop(len(asLines) - 1);
2704
2705 #
2706 # Check for old style: Opcode 0x0f 0x12
2707 #
2708 if asLines[0].startswith('Opcode '):
2709 self.parseCommentOldOpcode(asLines);
2710
2711 #
2712 # Look for @op* tagged data.
2713 #
2714 cOpTags = 0;
2715 sFlatDefault = None;
2716 sCurTag = '@default';
2717 iCurTagLine = 0;
2718 asCurSection = [];
2719 aasSections = [ asCurSection, ];
2720 for iLine, sLine in enumerate(asLines):
2721 if not sLine.startswith('@'):
2722 if sLine:
2723 asCurSection.append(sLine);
2724 elif asCurSection:
2725 asCurSection = [];
2726 aasSections.append(asCurSection);
2727 else:
2728 #
2729 # Process the previous tag.
2730 #
2731 if not asCurSection and len(aasSections) > 1:
2732 aasSections.pop(-1);
2733 if sCurTag in self.dTagHandlers:
2734 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2735 cOpTags += 1;
2736 elif sCurTag.startswith('@op'):
2737 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2738 elif sCurTag == '@default':
2739 sFlatDefault = self.flattenAllSections(aasSections);
2740 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2741 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2742 elif sCurTag in ['@encoding', '@opencoding']:
2743 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2744
2745 #
2746 # New tag.
2747 #
2748 asSplit = sLine.split(None, 1);
2749 sCurTag = asSplit[0].lower();
2750 if len(asSplit) > 1:
2751 asCurSection = [asSplit[1],];
2752 else:
2753 asCurSection = [];
2754 aasSections = [asCurSection, ];
2755 iCurTagLine = iLine;
2756
2757 #
2758 # Process the final tag.
2759 #
2760 if not asCurSection and len(aasSections) > 1:
2761 aasSections.pop(-1);
2762 if sCurTag in self.dTagHandlers:
2763 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2764 cOpTags += 1;
2765 elif sCurTag.startswith('@op'):
2766 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2767 elif sCurTag == '@default':
2768 sFlatDefault = self.flattenAllSections(aasSections);
2769
2770 #
2771 # Don't allow default text in blocks containing @op*.
2772 #
2773 if cOpTags > 0 and sFlatDefault:
2774 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2775
2776 return True;
2777
2778 def parseMacroInvocation(self, sInvocation):
2779 """
2780 Parses a macro invocation.
2781
2782 Returns a tuple, first element is the offset following the macro
2783 invocation. The second element is a list of macro arguments, where the
2784 zero'th is the macro name.
2785 """
2786 # First the name.
2787 offOpen = sInvocation.find('(');
2788 if offOpen <= 0:
2789 self.raiseError("macro invocation open parenthesis not found");
2790 sName = sInvocation[:offOpen].strip();
2791 if not self.oReMacroName.match(sName):
2792 return self.error("invalid macro name '%s'" % (sName,));
2793 asRet = [sName, ];
2794
2795 # Arguments.
2796 iLine = self.iLine;
2797 cDepth = 1;
2798 off = offOpen + 1;
2799 offStart = off;
2800 while cDepth > 0:
2801 if off >= len(sInvocation):
2802 if iLine >= len(self.asLines):
2803 return self.error('macro invocation beyond end of file');
2804 sInvocation += self.asLines[iLine];
2805 iLine += 1;
2806 ch = sInvocation[off];
2807
2808 if ch == ',' or ch == ')':
2809 if cDepth == 1:
2810 asRet.append(sInvocation[offStart:off].strip());
2811 offStart = off + 1;
2812 if ch == ')':
2813 cDepth -= 1;
2814 elif ch == '(':
2815 cDepth += 1;
2816 off += 1;
2817
2818 return (off, asRet);
2819
2820 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2821 """
2822 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2823 """
2824 offHit = sCode.find(sMacro);
2825 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2826 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2827 return (offHit + offAfter, asRet);
2828 return (len(sCode), None);
2829
2830 def findAndParseMacroInvocation(self, sCode, sMacro):
2831 """
2832 Returns None if not found, arguments as per parseMacroInvocation if found.
2833 """
2834 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2835
2836 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2837 """
2838 Returns same as findAndParseMacroInvocation.
2839 """
2840 for sMacro in asMacro:
2841 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2842 if asRet is not None:
2843 return asRet;
2844 return None;
2845
2846 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2847 sDisHints, sIemHints, asOperands):
2848 """
2849 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2850 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2851 """
2852 #
2853 # Some invocation checks.
2854 #
2855 if sUpper != sUpper.upper():
2856 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2857 if sLower != sLower.lower():
2858 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2859 if sUpper.lower() != sLower:
2860 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2861 if not self.oReMnemonic.match(sLower):
2862 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2863
2864 #
2865 # Check if sIemHints tells us to not consider this macro invocation.
2866 #
2867 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2868 return True;
2869
2870 # Apply to the last instruction only for now.
2871 if not self.aoCurInstrs:
2872 self.addInstruction();
2873 oInstr = self.aoCurInstrs[-1];
2874 if oInstr.iLineMnemonicMacro == -1:
2875 oInstr.iLineMnemonicMacro = self.iLine;
2876 else:
2877 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2878 % (sMacro, oInstr.iLineMnemonicMacro,));
2879
2880 # Mnemonic
2881 if oInstr.sMnemonic is None:
2882 oInstr.sMnemonic = sLower;
2883 elif oInstr.sMnemonic != sLower:
2884 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2885
2886 # Process operands.
2887 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2888 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2889 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2890 for iOperand, sType in enumerate(asOperands):
2891 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2892 if sWhere is None:
2893 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2894 if iOperand < len(oInstr.aoOperands): # error recovery.
2895 sWhere = oInstr.aoOperands[iOperand].sWhere;
2896 sType = oInstr.aoOperands[iOperand].sType;
2897 else:
2898 sWhere = 'reg';
2899 sType = 'Gb';
2900 if iOperand == len(oInstr.aoOperands):
2901 oInstr.aoOperands.append(Operand(sWhere, sType))
2902 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2903 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2904 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2905 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2906
2907 # Encoding.
2908 if sForm not in g_kdIemForms:
2909 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2910 else:
2911 if oInstr.sEncoding is None:
2912 oInstr.sEncoding = g_kdIemForms[sForm][0];
2913 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2914 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2915 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2916
2917 # Check the parameter locations for the encoding.
2918 if g_kdIemForms[sForm][1] is not None:
2919 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2920 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2921 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2922 else:
2923 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2924 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2925 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2926 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2927
2928 # Stats.
2929 if not self.oReStatsName.match(sStats):
2930 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2931 elif oInstr.sStats is None:
2932 oInstr.sStats = sStats;
2933 elif oInstr.sStats != sStats:
2934 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2935 % (sMacro, oInstr.sStats, sStats,));
2936
2937 # Process the hints (simply merge with @ophints w/o checking anything).
2938 for sHint in sDisHints.split('|'):
2939 sHint = sHint.strip();
2940 if sHint.startswith('DISOPTYPE_'):
2941 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2942 if sShortHint in g_kdHints:
2943 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2944 else:
2945 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2946 elif sHint != '0':
2947 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2948
2949 for sHint in sIemHints.split('|'):
2950 sHint = sHint.strip();
2951 if sHint.startswith('IEMOPHINT_'):
2952 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2953 if sShortHint in g_kdHints:
2954 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2955 else:
2956 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2957 elif sHint != '0':
2958 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2959
2960
2961 _ = sAsm;
2962 return True;
2963
2964 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2965 """
2966 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2967 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2968 """
2969 if not asOperands:
2970 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2971 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2972 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2973
2974 def checkCodeForMacro(self, sCode):
2975 """
2976 Checks code for relevant macro invocation.
2977 """
2978 #
2979 # Scan macro invocations.
2980 #
2981 if sCode.find('(') > 0:
2982 # Look for instruction decoder function definitions. ASSUME single line.
2983 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2984 [ 'FNIEMOP_DEF',
2985 'FNIEMOP_STUB',
2986 'FNIEMOP_STUB_1',
2987 'FNIEMOP_UD_STUB',
2988 'FNIEMOP_UD_STUB_1' ]);
2989 if asArgs is not None:
2990 sFunction = asArgs[1];
2991
2992 if not self.aoCurInstrs:
2993 self.addInstruction();
2994 for oInstr in self.aoCurInstrs:
2995 if oInstr.iLineFnIemOpMacro == -1:
2996 oInstr.iLineFnIemOpMacro = self.iLine;
2997 else:
2998 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
2999 self.setInstrunctionAttrib('sFunction', sFunction);
3000 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3001 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3002 if asArgs[0].find('STUB') > 0:
3003 self.doneInstructions();
3004 return True;
3005
3006 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3007 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3008 if asArgs is not None:
3009 if len(self.aoCurInstrs) == 1:
3010 oInstr = self.aoCurInstrs[0];
3011 if oInstr.sStats is None:
3012 oInstr.sStats = asArgs[1];
3013 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3014
3015 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3016 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3017 if asArgs is not None:
3018 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3019 []);
3020 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3021 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3022 if asArgs is not None:
3023 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3024 [asArgs[6],]);
3025 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3026 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3027 if asArgs is not None:
3028 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3029 [asArgs[6], asArgs[7]]);
3030 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3031 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3032 if asArgs is not None:
3033 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3034 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3035 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3036 # a_fIemHints)
3037 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3038 if asArgs is not None:
3039 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3040 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3041
3042 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3043 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3044 if asArgs is not None:
3045 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3046 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3047 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3048 if asArgs is not None:
3049 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3050 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3051 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3052 if asArgs is not None:
3053 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3054 [asArgs[4], asArgs[5],]);
3055 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3056 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3057 if asArgs is not None:
3058 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3059 [asArgs[4], asArgs[5], asArgs[6],]);
3060 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3061 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3062 if asArgs is not None:
3063 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3064 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3065
3066 return False;
3067
3068
3069 def parse(self):
3070 """
3071 Parses the given file.
3072 Returns number or errors.
3073 Raises exception on fatal trouble.
3074 """
3075 #self.debug('Parsing %s' % (self.sSrcFile,));
3076
3077 while self.iLine < len(self.asLines):
3078 sLine = self.asLines[self.iLine];
3079 self.iLine += 1;
3080
3081 # We only look for comments, so only lines with a slash might possibly
3082 # influence the parser state.
3083 offSlash = sLine.find('/');
3084 if offSlash >= 0:
3085 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3086 offLine = 0;
3087 while offLine < len(sLine):
3088 if self.iState == self.kiCode:
3089 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3090 if offHit >= 0:
3091 self.checkCodeForMacro(sLine[offLine:offHit]);
3092 self.sComment = '';
3093 self.iCommentLine = self.iLine;
3094 self.iState = self.kiCommentMulti;
3095 offLine = offHit + 2;
3096 else:
3097 self.checkCodeForMacro(sLine[offLine:]);
3098 offLine = len(sLine);
3099
3100 elif self.iState == self.kiCommentMulti:
3101 offHit = sLine.find('*/', offLine);
3102 if offHit >= 0:
3103 self.sComment += sLine[offLine:offHit];
3104 self.iState = self.kiCode;
3105 offLine = offHit + 2;
3106 self.parseComment();
3107 else:
3108 self.sComment += sLine[offLine:];
3109 offLine = len(sLine);
3110 else:
3111 assert False;
3112 # C++ line comment.
3113 elif offSlash > 0:
3114 self.checkCodeForMacro(sLine[:offSlash]);
3115
3116 # No slash, but append the line if in multi-line comment.
3117 elif self.iState == self.kiCommentMulti:
3118 #self.debug('line %d: multi' % (self.iLine,));
3119 self.sComment += sLine;
3120
3121 # No slash, but check code line for relevant macro.
3122 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3123 #self.debug('line %d: macro' % (self.iLine,));
3124 self.checkCodeForMacro(sLine);
3125
3126 # If the line is a '}' in the first position, complete the instructions.
3127 elif self.iState == self.kiCode and sLine[0] == '}':
3128 #self.debug('line %d: }' % (self.iLine,));
3129 self.doneInstructions();
3130
3131 self.doneInstructions();
3132 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3133 return self.printErrors();
3134
3135
3136def __parseFileByName(sSrcFile, sDefaultMap):
3137 """
3138 Parses one source file for instruction specfications.
3139 """
3140 #
3141 # Read sSrcFile into a line array.
3142 #
3143 try:
3144 oFile = open(sSrcFile, "r");
3145 except Exception as oXcpt:
3146 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3147 try:
3148 asLines = oFile.readlines();
3149 except Exception as oXcpt:
3150 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3151 finally:
3152 oFile.close();
3153
3154 #
3155 # Do the parsing.
3156 #
3157 try:
3158 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3159 except ParserException as oXcpt:
3160 print(str(oXcpt));
3161 raise;
3162 except Exception as oXcpt:
3163 raise;
3164
3165 return cErrors;
3166
3167
3168def __doTestCopying():
3169 """
3170 Executes the asCopyTests instructions.
3171 """
3172 asErrors = [];
3173 for oDstInstr in g_aoAllInstructions:
3174 if oDstInstr.asCopyTests:
3175 for sSrcInstr in oDstInstr.asCopyTests:
3176 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3177 if oSrcInstr:
3178 aoSrcInstrs = [oSrcInstr,];
3179 else:
3180 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3181 if aoSrcInstrs:
3182 for oSrcInstr in aoSrcInstrs:
3183 if oSrcInstr != oDstInstr:
3184 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3185 else:
3186 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3187 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3188 else:
3189 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3190 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3191
3192 if asErrors:
3193 sys.stderr.write(u''.join(asErrors));
3194 return len(asErrors);
3195
3196
3197def __applyOnlyTest():
3198 """
3199 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3200 all other instructions so that only these get tested.
3201 """
3202 if g_aoOnlyTestInstructions:
3203 for oInstr in g_aoAllInstructions:
3204 if oInstr.aoTests:
3205 if oInstr not in g_aoOnlyTestInstructions:
3206 oInstr.aoTests = [];
3207 return 0;
3208
3209def __parseAll():
3210 """
3211 Parses all the IEMAllInstruction*.cpp.h files.
3212
3213 Raises exception on failure.
3214 """
3215 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3216 cErrors = 0;
3217 for sDefaultMap, sName in [
3218 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3219 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3220 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3221 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3222 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3223 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3224 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3225 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3226 ]:
3227 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3228 cErrors += __doTestCopying();
3229 cErrors += __applyOnlyTest();
3230
3231 if cErrors != 0:
3232 #raise Exception('%d parse errors' % (cErrors,));
3233 sys.exit(1);
3234 return True;
3235
3236
3237
3238__parseAll();
3239
3240
3241#
3242# Generators (may perhaps move later).
3243#
3244def generateDisassemblerTables(oDstFile = sys.stdout):
3245 """
3246 Generates disassembler tables.
3247 """
3248
3249 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3250 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3251 assert oMap.sName == sName;
3252 asLines = [];
3253
3254 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3255 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3256 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3257 asLines.append('{');
3258
3259 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3260
3261 aoTableOrder = oMap.getInstructionsInTableOrder();
3262 for iInstr, oInstr in enumerate(aoTableOrder):
3263
3264 if (iInstr & 0xf) == 0:
3265 if iInstr != 0:
3266 asLines.append('');
3267 asLines.append(' /* %x */' % (iInstr >> 4,));
3268
3269 if oInstr is None:
3270 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3271 elif isinstance(oInstr, list):
3272 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3273 else:
3274 sMacro = 'OP';
3275 cMaxOperands = 3;
3276 if len(oInstr.aoOperands) > 3:
3277 sMacro = 'OPVEX'
3278 cMaxOperands = 4;
3279 assert len(oInstr.aoOperands) <= cMaxOperands;
3280
3281 #
3282 # Format string.
3283 #
3284 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3285 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3286 sTmp += ' ' if iOperand == 0 else ',';
3287 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3288 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3289 else:
3290 sTmp += g_kdOpTypes[oOperand.sType][2];
3291 sTmp += '",';
3292 asColumns = [ sTmp, ];
3293
3294 #
3295 # Decoders.
3296 #
3297 iStart = len(asColumns);
3298 if oInstr.sEncoding is None:
3299 pass;
3300 elif oInstr.sEncoding == 'ModR/M':
3301 # ASSUME the first operand is using the ModR/M encoding
3302 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3303 asColumns.append('IDX_ParseModRM,');
3304 ## @todo IDX_ParseVexDest
3305 # Is second operand using ModR/M too?
3306 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3307 asColumns.append('IDX_UseModRM,')
3308 elif oInstr.sEncoding in [ 'prefix', ]:
3309 for oOperand in oInstr.aoOperands:
3310 asColumns.append('0,');
3311 elif oInstr.sEncoding in [ 'fixed' ]:
3312 pass;
3313 elif oInstr.sEncoding == 'vex2':
3314 asColumns.append('IDX_ParseVex2b,')
3315 elif oInstr.sEncoding == 'vex3':
3316 asColumns.append('IDX_ParseVex3b,')
3317 elif oInstr.sEncoding in g_dInstructionMaps:
3318 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3319 else:
3320 ## @todo
3321 #IDX_ParseTwoByteEsc,
3322 #IDX_ParseGrp1,
3323 #IDX_ParseShiftGrp2,
3324 #IDX_ParseGrp3,
3325 #IDX_ParseGrp4,
3326 #IDX_ParseGrp5,
3327 #IDX_Parse3DNow,
3328 #IDX_ParseGrp6,
3329 #IDX_ParseGrp7,
3330 #IDX_ParseGrp8,
3331 #IDX_ParseGrp9,
3332 #IDX_ParseGrp10,
3333 #IDX_ParseGrp12,
3334 #IDX_ParseGrp13,
3335 #IDX_ParseGrp14,
3336 #IDX_ParseGrp15,
3337 #IDX_ParseGrp16,
3338 #IDX_ParseThreeByteEsc4,
3339 #IDX_ParseThreeByteEsc5,
3340 #IDX_ParseModFence,
3341 #IDX_ParseEscFP,
3342 #IDX_ParseNopPause,
3343 #IDX_ParseInvOpModRM,
3344 assert False, str(oInstr);
3345
3346 # Check for immediates and stuff in the remaining operands.
3347 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3348 sIdx = g_kdOpTypes[oOperand.sType][0];
3349 if sIdx != 'IDX_UseModRM':
3350 asColumns.append(sIdx + ',');
3351 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3352
3353 #
3354 # Opcode and operands.
3355 #
3356 assert oInstr.sDisEnum, str(oInstr);
3357 asColumns.append(oInstr.sDisEnum + ',');
3358 iStart = len(asColumns)
3359 for oOperand in oInstr.aoOperands:
3360 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3361 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3362
3363 #
3364 # Flags.
3365 #
3366 sTmp = '';
3367 for sHint in sorted(oInstr.dHints.keys()):
3368 sDefine = g_kdHints[sHint];
3369 if sDefine.startswith('DISOPTYPE_'):
3370 if sTmp:
3371 sTmp += ' | ' + sDefine;
3372 else:
3373 sTmp += sDefine;
3374 if sTmp:
3375 sTmp += '),';
3376 else:
3377 sTmp += '0),';
3378 asColumns.append(sTmp);
3379
3380 #
3381 # Format the columns into a line.
3382 #
3383 sLine = '';
3384 for i, s in enumerate(asColumns):
3385 if len(sLine) < aoffColumns[i]:
3386 sLine += ' ' * (aoffColumns[i] - len(sLine));
3387 else:
3388 sLine += ' ';
3389 sLine += s;
3390
3391 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3392 # DISOPTYPE_HARMLESS),
3393 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3394 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3395
3396 asLines.append(sLine);
3397
3398 asLines.append('};');
3399 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3400
3401 #
3402 # Write out the lines.
3403 #
3404 oDstFile.write('\n'.join(asLines));
3405 oDstFile.write('\n');
3406 break; #for now
3407
3408if __name__ == '__main__':
3409 generateDisassemblerTables();
3410
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette