VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66810

Last change on this file since 66810 was 66810, checked in by vboxsync, 8 years ago

IEM: Implemented movq2dq Vdq,Nq (f3 0f d6)

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 141.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66810 2017-05-05 14:36:10Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66810 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
220 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
221 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
222 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
223 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
224 'WqZxReg': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
225
226 # ModR/M.rm - register only.
227 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
228 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
229 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
230
231 # ModR/M.rm - memory only.
232 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
233 'MbRO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
234 'MdRO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
235 'MdWO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
236 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
237 'MqWO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
238 'MRO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
239 'MRW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
240
241 # ModR/M.reg
242 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
243 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
244 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
245 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
246 'VssZxReg': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
247 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
248 'VsdZxReg': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
249 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
250 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
251 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
252 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
253 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
254 'VqZxReg': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
255
256 # Immediate values.
257 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instructions.
258 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
259 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
260 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
261 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
262 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
263
264 # Address operands (no ModR/M).
265 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
266 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
267
268 # Relative jump targets
269 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
270 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
271
272 # DS:rSI
273 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
274 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
275 # ES:rDI
276 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
277 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
278
279 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
280
281 # Fixed registers.
282 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
283 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
284 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
285 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
286 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
287 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
288 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
289 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
290};
291
292# IDX_ParseFixedReg
293# IDX_ParseVexDest
294
295
296## IEMFORM_XXX mappings.
297g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
298 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
299 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
300 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
301 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
302 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
303 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
304 'M': ( 'ModR/M', [ 'rm', ], ),
305 'M_REG': ( 'ModR/M', [ 'rm', ], ),
306 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
307 'R': ( 'ModR/M', [ 'reg', ], ),
308
309 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
310 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
311 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
312 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
313 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
314 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
315 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
316 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
317 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
318 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
319 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
320 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
321
322 'FIXED': ( 'fixed', None, )
323};
324
325## \@oppfx values.
326g_kdPrefixes = {
327 'none': [],
328 '0x66': [],
329 '0xf3': [],
330 '0xf2': [],
331};
332
333## Special \@opcode tag values.
334g_kdSpecialOpcodes = {
335 '/reg': [],
336 'mr/reg': [],
337 '11 /reg': [],
338 '!11 /reg': [],
339 '11 mr/reg': [],
340 '!11 mr/reg': [],
341};
342
343## Special \@opcodesub tag values.
344g_kdSubOpcodes = {
345 'none': [ None, ],
346 '11 mr/reg': [ '11 mr/reg', ],
347 '11': [ '11 mr/reg', ], ##< alias
348 '!11 mr/reg': [ '!11 mr/reg', ],
349 '!11': [ '!11 mr/reg', ], ##< alias
350};
351
352## Valid values for \@openc
353g_kdEncodings = {
354 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
355 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
356 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
357 'prefix': [ None, ], ##< Prefix
358};
359
360## \@opunused, \@opinvalid, \@opinvlstyle
361g_kdInvalidStyles = {
362 'immediate': [], ##< CPU stops decoding immediately after the opcode.
363 'intel-modrm': [], ##< Intel decodes ModR/M.
364 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
365 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
366 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
367};
368
369g_kdCpuNames = {
370 '8086': (),
371 '80186': (),
372 '80286': (),
373 '80386': (),
374 '80486': (),
375};
376
377## \@opcpuid
378g_kdCpuIdFlags = {
379 'vme': 'X86_CPUID_FEATURE_EDX_VME',
380 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
381 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
382 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
383 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
384 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
385 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
386 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
387 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
388 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
389 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
390 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
391 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
392 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
393 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
394 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
395 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
396 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
397 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
398 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
399 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
400 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
401 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
402 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
403 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
404 'aes': 'X86_CPUID_FEATURE_ECX_AES',
405 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
406 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
407 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
408 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
409
410 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
411 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
412 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
413 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
414 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
415 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
416 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
417 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
418 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
419 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
420};
421
422## \@ophints values.
423g_kdHints = {
424 'invalid': 'DISOPTYPE_INVALID', ##<
425 'harmless': 'DISOPTYPE_HARMLESS', ##<
426 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
427 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
428 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
429 'portio': 'DISOPTYPE_PORTIO', ##<
430 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
431 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
432 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
433 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
434 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
435 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
436 'illegal': 'DISOPTYPE_ILLEGAL', ##<
437 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
438 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
439 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
440 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
441 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
442 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
443 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
444 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
445 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
446 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
447 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
448 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
449 ## (only in 16 & 32 bits mode!)
450 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
451 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
452 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
453 'ignores_op_size': '', ##< Ignores both operand size prefixes.
454 'lock_allowed': '', ##< Lock prefix allowed.
455};
456
457## \@opxcpttype values (see SDMv2 2.4, 2.7).
458g_kdXcptTypes = {
459 'none': [],
460 '1': [],
461 '2': [],
462 '3': [],
463 '4': [],
464 '4UA': [],
465 '5': [],
466 '6': [],
467 '7': [],
468 '8': [],
469 '11': [],
470 '12': [],
471 'E1': [],
472 'E1NF': [],
473 'E2': [],
474 'E3': [],
475 'E3NF': [],
476 'E4': [],
477 'E4NF': [],
478 'E5': [],
479 'E5NF': [],
480 'E6': [],
481 'E6NF': [],
482 'E7NF': [],
483 'E9': [],
484 'E9NF': [],
485 'E10': [],
486 'E11': [],
487 'E12': [],
488 'E12NF': [],
489};
490
491
492def _isValidOpcodeByte(sOpcode):
493 """
494 Checks if sOpcode is a valid lower case opcode byte.
495 Returns true/false.
496 """
497 if len(sOpcode) == 4:
498 if sOpcode[:2] == '0x':
499 if sOpcode[2] in '0123456789abcdef':
500 if sOpcode[3] in '0123456789abcdef':
501 return True;
502 return False;
503
504
505class InstructionMap(object):
506 """
507 Instruction map.
508
509 The opcode map provides the lead opcode bytes (empty for the one byte
510 opcode map). An instruction can be member of multiple opcode maps as long
511 as it uses the same opcode value within the map (because of VEX).
512 """
513
514 kdEncodings = {
515 'legacy': [],
516 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
517 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
518 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
519 'xop8': [], ##< XOP prefix with vvvvv = 8
520 'xop9': [], ##< XOP prefix with vvvvv = 9
521 'xop10': [], ##< XOP prefix with vvvvv = 10
522 };
523 ## Selectors.
524 ## The first value is the number of table entries required by a
525 ## decoder or disassembler for this type of selector.
526 kdSelectors = {
527 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
528 '/r': [ 8, ], ##< modrm.reg selects the instruction.
529 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
530 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
531 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
532 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
533 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
534 };
535
536 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
537 assert sSelector in self.kdSelectors;
538 assert sEncoding in self.kdEncodings;
539 if asLeadOpcodes is None:
540 asLeadOpcodes = [];
541 else:
542 for sOpcode in asLeadOpcodes:
543 assert _isValidOpcodeByte(sOpcode);
544 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
545
546 self.sName = sName;
547 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
548 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
549 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
550 self.aoInstructions = []; # type: Instruction
551 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
552
553 def getTableSize(self):
554 """
555 Number of table entries. This corresponds directly to the selector.
556 """
557 return self.kdSelectors[self.sSelector][0];
558
559 def getInstructionIndex(self, oInstr):
560 """
561 Returns the table index for the instruction.
562 """
563 bOpcode = oInstr.getOpcodeByte();
564
565 # The byte selector is simple. We need a full opcode byte and need just return it.
566 if self.sSelector == 'byte':
567 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
568 return bOpcode;
569
570 # The other selectors needs masking and shifting.
571 if self.sSelector == '/r':
572 return (bOpcode >> 3) & 0x7;
573
574 if self.sSelector == 'mod /r':
575 return (bOpcode >> 3) & 0x1f;
576
577 if self.sSelector == 'memreg /r':
578 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
579
580 if self.sSelector == '!11 /r':
581 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
582 return (bOpcode >> 3) & 0x7;
583
584 if self.sSelector == '11 /r':
585 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
586 return (bOpcode >> 3) & 0x7;
587
588 if self.sSelector == '11':
589 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
590 return bOpcode & 0x3f;
591
592 assert False, self.sSelector;
593 return -1;
594
595 def getInstructionsInTableOrder(self):
596 """
597 Get instructions in table order.
598
599 Returns array of instructions. Normally there is exactly one
600 instruction per entry. However the entry could also be None if
601 not instruction was specified for that opcode value. Or there
602 could be a list of instructions to deal with special encodings
603 where for instance prefix (e.g. REX.W) encodes a different
604 instruction or different CPUs have different instructions or
605 prefixes in the same place.
606 """
607 # Start with empty table.
608 cTable = self.getTableSize();
609 aoTable = [None] * cTable;
610
611 # Insert the instructions.
612 for oInstr in self.aoInstructions:
613 if oInstr.sOpcode:
614 idxOpcode = self.getInstructionIndex(oInstr);
615 assert idxOpcode < cTable, str(idxOpcode);
616
617 oExisting = aoTable[idxOpcode];
618 if oExisting is None:
619 aoTable[idxOpcode] = oInstr;
620 elif not isinstance(oExisting, list):
621 aoTable[idxOpcode] = list([oExisting, oInstr]);
622 else:
623 oExisting.append(oInstr);
624
625 return aoTable;
626
627
628 def getDisasTableName(self):
629 """
630 Returns the disassembler table name for this map.
631 """
632 sName = 'g_aDisas';
633 for sWord in self.sName.split('_'):
634 if sWord == 'm': # suffix indicating modrm.mod==mem
635 sName += '_m';
636 elif sWord == 'r': # suffix indicating modrm.mod==reg
637 sName += '_r';
638 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
639 sName += '_' + sWord;
640 else:
641 sWord = sWord.replace('grp', 'Grp');
642 sWord = sWord.replace('map', 'Map');
643 sName += sWord[0].upper() + sWord[1:];
644 return sName;
645
646
647class TestType(object):
648 """
649 Test value type.
650
651 This base class deals with integer like values. The fUnsigned constructor
652 parameter indicates the default stance on zero vs sign extending. It is
653 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
654 """
655 def __init__(self, sName, acbSizes = None, fUnsigned = True):
656 self.sName = sName;
657 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
658 self.fUnsigned = fUnsigned;
659
660 class BadValue(Exception):
661 """ Bad value exception. """
662 def __init__(self, sMessage):
663 Exception.__init__(self, sMessage);
664 self.sMessage = sMessage;
665
666 ## For ascii ~ operator.
667 kdHexInv = {
668 '0': 'f',
669 '1': 'e',
670 '2': 'd',
671 '3': 'c',
672 '4': 'b',
673 '5': 'a',
674 '6': '9',
675 '7': '8',
676 '8': '7',
677 '9': '6',
678 'a': '5',
679 'b': '4',
680 'c': '3',
681 'd': '2',
682 'e': '1',
683 'f': '0',
684 };
685
686 def get(self, sValue):
687 """
688 Get the shortest normal sized byte representation of oValue.
689
690 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
691 The latter form is for AND+OR pairs where the first entry is what to
692 AND with the field and the second the one or OR with.
693
694 Raises BadValue if invalid value.
695 """
696 if not sValue:
697 raise TestType.BadValue('empty value');
698
699 # Deal with sign and detect hexadecimal or decimal.
700 fSignExtend = not self.fUnsigned;
701 if sValue[0] == '-' or sValue[0] == '+':
702 fSignExtend = True;
703 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
704 else:
705 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
706
707 # try convert it to long integer.
708 try:
709 iValue = long(sValue, 16 if fHex else 10);
710 except Exception as oXcpt:
711 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
712
713 # Convert the hex string and pad it to a decent value. Negative values
714 # needs to be manually converted to something non-negative (~-n + 1).
715 if iValue >= 0:
716 sHex = hex(iValue);
717 if sys.version_info[0] < 3:
718 assert sHex[-1] == 'L';
719 sHex = sHex[:-1];
720 assert sHex[:2] == '0x';
721 sHex = sHex[2:];
722 else:
723 sHex = hex(-iValue - 1);
724 if sys.version_info[0] < 3:
725 assert sHex[-1] == 'L';
726 sHex = sHex[:-1];
727 assert sHex[:2] == '0x';
728 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
729 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
730 sHex = 'f' + sHex;
731
732 cDigits = len(sHex);
733 if cDigits <= self.acbSizes[-1] * 2:
734 for cb in self.acbSizes:
735 cNaturalDigits = cb * 2;
736 if cDigits <= cNaturalDigits:
737 break;
738 else:
739 cNaturalDigits = self.acbSizes[-1] * 2;
740 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
741 assert isinstance(cNaturalDigits, int)
742
743 if cNaturalDigits != cDigits:
744 cNeeded = cNaturalDigits - cDigits;
745 if iValue >= 0:
746 sHex = ('0' * cNeeded) + sHex;
747 else:
748 sHex = ('f' * cNeeded) + sHex;
749
750 # Invert and convert to bytearray and return it.
751 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
752
753 return ((fSignExtend, abValue),);
754
755 def validate(self, sValue):
756 """
757 Returns True if value is okay, error message on failure.
758 """
759 try:
760 self.get(sValue);
761 except TestType.BadValue as oXcpt:
762 return oXcpt.sMessage;
763 return True;
764
765 def isAndOrPair(self, sValue):
766 """
767 Checks if sValue is a pair.
768 """
769 _ = sValue;
770 return False;
771
772
773class TestTypeEflags(TestType):
774 """
775 Special value parsing for EFLAGS/RFLAGS/FLAGS.
776 """
777
778 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
779
780 def __init__(self, sName):
781 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
782
783 def get(self, sValue):
784 fClear = 0;
785 fSet = 0;
786 for sFlag in sValue.split(','):
787 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
788 if sConstant is None:
789 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
790 if sConstant[0] == '!':
791 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
792 else:
793 fSet |= g_kdX86EFlagsConstants[sConstant];
794
795 aoSet = TestType.get(self, '0x%x' % (fSet,));
796 if fClear != 0:
797 aoClear = TestType.get(self, '%#x' % (fClear,))
798 assert self.isAndOrPair(sValue) is True;
799 return (aoClear[0], aoSet[0]);
800 assert self.isAndOrPair(sValue) is False;
801 return aoSet;
802
803 def isAndOrPair(self, sValue):
804 for sZeroFlag in self.kdZeroValueFlags:
805 if sValue.find(sZeroFlag) >= 0:
806 return True;
807 return False;
808
809class TestTypeFromDict(TestType):
810 """
811 Special value parsing for CR0.
812 """
813
814 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
815
816 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
817 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
818 self.kdConstantsAndValues = kdConstantsAndValues;
819 self.sConstantPrefix = sConstantPrefix;
820
821 def get(self, sValue):
822 fValue = 0;
823 for sFlag in sValue.split(','):
824 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
825 if fFlagValue is None:
826 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
827 fValue |= fFlagValue;
828 return TestType.get(self, '0x%x' % (fValue,));
829
830
831class TestInOut(object):
832 """
833 One input or output state modifier.
834
835 This should be thought as values to modify BS3REGCTX and extended (needs
836 to be structured) state.
837 """
838 ## Assigned operators.
839 kasOperators = [
840 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
841 '&~=',
842 '&=',
843 '|=',
844 '='
845 ];
846 ## Types
847 kdTypes = {
848 'uint': TestType('uint', fUnsigned = True),
849 'int': TestType('int'),
850 'efl': TestTypeEflags('efl'),
851 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
852 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
853 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
854 };
855 ## CPU context fields.
856 kdFields = {
857 # name: ( default type, [both|input|output], )
858 # Operands.
859 'op1': ( 'uint', 'both', ), ## \@op1
860 'op2': ( 'uint', 'both', ), ## \@op2
861 'op3': ( 'uint', 'both', ), ## \@op3
862 'op4': ( 'uint', 'both', ), ## \@op4
863 # Flags.
864 'efl': ( 'efl', 'both', ),
865 'efl_undef': ( 'uint', 'output', ),
866 # 8-bit GPRs.
867 'al': ( 'uint', 'both', ),
868 'cl': ( 'uint', 'both', ),
869 'dl': ( 'uint', 'both', ),
870 'bl': ( 'uint', 'both', ),
871 'ah': ( 'uint', 'both', ),
872 'ch': ( 'uint', 'both', ),
873 'dh': ( 'uint', 'both', ),
874 'bh': ( 'uint', 'both', ),
875 'r8l': ( 'uint', 'both', ),
876 'r9l': ( 'uint', 'both', ),
877 'r10l': ( 'uint', 'both', ),
878 'r11l': ( 'uint', 'both', ),
879 'r12l': ( 'uint', 'both', ),
880 'r13l': ( 'uint', 'both', ),
881 'r14l': ( 'uint', 'both', ),
882 'r15l': ( 'uint', 'both', ),
883 # 16-bit GPRs.
884 'ax': ( 'uint', 'both', ),
885 'dx': ( 'uint', 'both', ),
886 'cx': ( 'uint', 'both', ),
887 'bx': ( 'uint', 'both', ),
888 'sp': ( 'uint', 'both', ),
889 'bp': ( 'uint', 'both', ),
890 'si': ( 'uint', 'both', ),
891 'di': ( 'uint', 'both', ),
892 'r8w': ( 'uint', 'both', ),
893 'r9w': ( 'uint', 'both', ),
894 'r10w': ( 'uint', 'both', ),
895 'r11w': ( 'uint', 'both', ),
896 'r12w': ( 'uint', 'both', ),
897 'r13w': ( 'uint', 'both', ),
898 'r14w': ( 'uint', 'both', ),
899 'r15w': ( 'uint', 'both', ),
900 # 32-bit GPRs.
901 'eax': ( 'uint', 'both', ),
902 'edx': ( 'uint', 'both', ),
903 'ecx': ( 'uint', 'both', ),
904 'ebx': ( 'uint', 'both', ),
905 'esp': ( 'uint', 'both', ),
906 'ebp': ( 'uint', 'both', ),
907 'esi': ( 'uint', 'both', ),
908 'edi': ( 'uint', 'both', ),
909 'r8d': ( 'uint', 'both', ),
910 'r9d': ( 'uint', 'both', ),
911 'r10d': ( 'uint', 'both', ),
912 'r11d': ( 'uint', 'both', ),
913 'r12d': ( 'uint', 'both', ),
914 'r13d': ( 'uint', 'both', ),
915 'r14d': ( 'uint', 'both', ),
916 'r15d': ( 'uint', 'both', ),
917 # 64-bit GPRs.
918 'rax': ( 'uint', 'both', ),
919 'rdx': ( 'uint', 'both', ),
920 'rcx': ( 'uint', 'both', ),
921 'rbx': ( 'uint', 'both', ),
922 'rsp': ( 'uint', 'both', ),
923 'rbp': ( 'uint', 'both', ),
924 'rsi': ( 'uint', 'both', ),
925 'rdi': ( 'uint', 'both', ),
926 'r8': ( 'uint', 'both', ),
927 'r9': ( 'uint', 'both', ),
928 'r10': ( 'uint', 'both', ),
929 'r11': ( 'uint', 'both', ),
930 'r12': ( 'uint', 'both', ),
931 'r13': ( 'uint', 'both', ),
932 'r14': ( 'uint', 'both', ),
933 'r15': ( 'uint', 'both', ),
934 # 16-bit, 32-bit or 64-bit registers according to operand size.
935 'oz.rax': ( 'uint', 'both', ),
936 'oz.rdx': ( 'uint', 'both', ),
937 'oz.rcx': ( 'uint', 'both', ),
938 'oz.rbx': ( 'uint', 'both', ),
939 'oz.rsp': ( 'uint', 'both', ),
940 'oz.rbp': ( 'uint', 'both', ),
941 'oz.rsi': ( 'uint', 'both', ),
942 'oz.rdi': ( 'uint', 'both', ),
943 'oz.r8': ( 'uint', 'both', ),
944 'oz.r9': ( 'uint', 'both', ),
945 'oz.r10': ( 'uint', 'both', ),
946 'oz.r11': ( 'uint', 'both', ),
947 'oz.r12': ( 'uint', 'both', ),
948 'oz.r13': ( 'uint', 'both', ),
949 'oz.r14': ( 'uint', 'both', ),
950 'oz.r15': ( 'uint', 'both', ),
951 # Control registers.
952 'cr0': ( 'cr0', 'both', ),
953 'cr4': ( 'cr4', 'both', ),
954 'xcr0': ( 'xcr0', 'both', ),
955 # FPU Registers
956 'fcw': ( 'uint', 'both', ),
957 'fsw': ( 'uint', 'both', ),
958 'ftw': ( 'uint', 'both', ),
959 'fop': ( 'uint', 'both', ),
960 'fpuip': ( 'uint', 'both', ),
961 'fpucs': ( 'uint', 'both', ),
962 'fpudp': ( 'uint', 'both', ),
963 'fpuds': ( 'uint', 'both', ),
964 'mxcsr': ( 'uint', 'both', ),
965 'st0': ( 'uint', 'both', ),
966 'st1': ( 'uint', 'both', ),
967 'st2': ( 'uint', 'both', ),
968 'st3': ( 'uint', 'both', ),
969 'st4': ( 'uint', 'both', ),
970 'st5': ( 'uint', 'both', ),
971 'st6': ( 'uint', 'both', ),
972 'st7': ( 'uint', 'both', ),
973 # MMX registers.
974 'mm0': ( 'uint', 'both', ),
975 'mm1': ( 'uint', 'both', ),
976 'mm2': ( 'uint', 'both', ),
977 'mm3': ( 'uint', 'both', ),
978 'mm4': ( 'uint', 'both', ),
979 'mm5': ( 'uint', 'both', ),
980 'mm6': ( 'uint', 'both', ),
981 'mm7': ( 'uint', 'both', ),
982 # SSE registers.
983 'xmm0': ( 'uint', 'both', ),
984 'xmm1': ( 'uint', 'both', ),
985 'xmm2': ( 'uint', 'both', ),
986 'xmm3': ( 'uint', 'both', ),
987 'xmm4': ( 'uint', 'both', ),
988 'xmm5': ( 'uint', 'both', ),
989 'xmm6': ( 'uint', 'both', ),
990 'xmm7': ( 'uint', 'both', ),
991 'xmm8': ( 'uint', 'both', ),
992 'xmm9': ( 'uint', 'both', ),
993 'xmm10': ( 'uint', 'both', ),
994 'xmm11': ( 'uint', 'both', ),
995 'xmm12': ( 'uint', 'both', ),
996 'xmm13': ( 'uint', 'both', ),
997 'xmm14': ( 'uint', 'both', ),
998 'xmm15': ( 'uint', 'both', ),
999 'xmm0.lo': ( 'uint', 'both', ),
1000 'xmm1.lo': ( 'uint', 'both', ),
1001 'xmm2.lo': ( 'uint', 'both', ),
1002 'xmm3.lo': ( 'uint', 'both', ),
1003 'xmm4.lo': ( 'uint', 'both', ),
1004 'xmm5.lo': ( 'uint', 'both', ),
1005 'xmm6.lo': ( 'uint', 'both', ),
1006 'xmm7.lo': ( 'uint', 'both', ),
1007 'xmm8.lo': ( 'uint', 'both', ),
1008 'xmm9.lo': ( 'uint', 'both', ),
1009 'xmm10.lo': ( 'uint', 'both', ),
1010 'xmm11.lo': ( 'uint', 'both', ),
1011 'xmm12.lo': ( 'uint', 'both', ),
1012 'xmm13.lo': ( 'uint', 'both', ),
1013 'xmm14.lo': ( 'uint', 'both', ),
1014 'xmm15.lo': ( 'uint', 'both', ),
1015 'xmm0.hi': ( 'uint', 'both', ),
1016 'xmm1.hi': ( 'uint', 'both', ),
1017 'xmm2.hi': ( 'uint', 'both', ),
1018 'xmm3.hi': ( 'uint', 'both', ),
1019 'xmm4.hi': ( 'uint', 'both', ),
1020 'xmm5.hi': ( 'uint', 'both', ),
1021 'xmm6.hi': ( 'uint', 'both', ),
1022 'xmm7.hi': ( 'uint', 'both', ),
1023 'xmm8.hi': ( 'uint', 'both', ),
1024 'xmm9.hi': ( 'uint', 'both', ),
1025 'xmm10.hi': ( 'uint', 'both', ),
1026 'xmm11.hi': ( 'uint', 'both', ),
1027 'xmm12.hi': ( 'uint', 'both', ),
1028 'xmm13.hi': ( 'uint', 'both', ),
1029 'xmm14.hi': ( 'uint', 'both', ),
1030 'xmm15.hi': ( 'uint', 'both', ),
1031 'xmm0.lo.zx': ( 'uint', 'both', ),
1032 'xmm1.lo.zx': ( 'uint', 'both', ),
1033 'xmm2.lo.zx': ( 'uint', 'both', ),
1034 'xmm3.lo.zx': ( 'uint', 'both', ),
1035 'xmm4.lo.zx': ( 'uint', 'both', ),
1036 'xmm5.lo.zx': ( 'uint', 'both', ),
1037 'xmm6.lo.zx': ( 'uint', 'both', ),
1038 'xmm7.lo.zx': ( 'uint', 'both', ),
1039 'xmm8.lo.zx': ( 'uint', 'both', ),
1040 'xmm9.lo.zx': ( 'uint', 'both', ),
1041 'xmm10.lo.zx': ( 'uint', 'both', ),
1042 'xmm11.lo.zx': ( 'uint', 'both', ),
1043 'xmm12.lo.zx': ( 'uint', 'both', ),
1044 'xmm13.lo.zx': ( 'uint', 'both', ),
1045 'xmm14.lo.zx': ( 'uint', 'both', ),
1046 'xmm15.lo.zx': ( 'uint', 'both', ),
1047 'xmm0.dw0': ( 'uint', 'both', ),
1048 'xmm1.dw0': ( 'uint', 'both', ),
1049 'xmm2.dw0': ( 'uint', 'both', ),
1050 'xmm3.dw0': ( 'uint', 'both', ),
1051 'xmm4.dw0': ( 'uint', 'both', ),
1052 'xmm5.dw0': ( 'uint', 'both', ),
1053 'xmm6.dw0': ( 'uint', 'both', ),
1054 'xmm7.dw0': ( 'uint', 'both', ),
1055 'xmm8.dw0': ( 'uint', 'both', ),
1056 'xmm9.dw0': ( 'uint', 'both', ),
1057 'xmm10.dw0': ( 'uint', 'both', ),
1058 'xmm11.dw0': ( 'uint', 'both', ),
1059 'xmm12.dw0': ( 'uint', 'both', ),
1060 'xmm13.dw0': ( 'uint', 'both', ),
1061 'xmm14.dw0': ( 'uint', 'both', ),
1062 'xmm15_dw0': ( 'uint', 'both', ),
1063 # AVX registers.
1064 'ymm0': ( 'uint', 'both', ),
1065 'ymm1': ( 'uint', 'both', ),
1066 'ymm2': ( 'uint', 'both', ),
1067 'ymm3': ( 'uint', 'both', ),
1068 'ymm4': ( 'uint', 'both', ),
1069 'ymm5': ( 'uint', 'both', ),
1070 'ymm6': ( 'uint', 'both', ),
1071 'ymm7': ( 'uint', 'both', ),
1072 'ymm8': ( 'uint', 'both', ),
1073 'ymm9': ( 'uint', 'both', ),
1074 'ymm10': ( 'uint', 'both', ),
1075 'ymm11': ( 'uint', 'both', ),
1076 'ymm12': ( 'uint', 'both', ),
1077 'ymm13': ( 'uint', 'both', ),
1078 'ymm14': ( 'uint', 'both', ),
1079 'ymm15': ( 'uint', 'both', ),
1080
1081 # Special ones.
1082 'value.xcpt': ( 'uint', 'output', ),
1083 };
1084
1085 def __init__(self, sField, sOp, sValue, sType):
1086 assert sField in self.kdFields;
1087 assert sOp in self.kasOperators;
1088 self.sField = sField;
1089 self.sOp = sOp;
1090 self.sValue = sValue;
1091 self.sType = sType;
1092 assert isinstance(sField, str);
1093 assert isinstance(sOp, str);
1094 assert isinstance(sType, str);
1095 assert isinstance(sValue, str);
1096
1097
1098class TestSelector(object):
1099 """
1100 One selector for an instruction test.
1101 """
1102 ## Selector compare operators.
1103 kasCompareOps = [ '==', '!=' ];
1104 ## Selector variables and their valid values.
1105 kdVariables = {
1106 # Operand size.
1107 'size': {
1108 'o16': 'size_o16',
1109 'o32': 'size_o32',
1110 'o64': 'size_o64',
1111 },
1112 # Execution ring.
1113 'ring': {
1114 '0': 'ring_0',
1115 '1': 'ring_1',
1116 '2': 'ring_2',
1117 '3': 'ring_3',
1118 '0..2': 'ring_0_thru_2',
1119 '1..3': 'ring_1_thru_3',
1120 },
1121 # Basic code mode.
1122 'codebits': {
1123 '64': 'code_64bit',
1124 '32': 'code_32bit',
1125 '16': 'code_16bit',
1126 },
1127 # cpu modes.
1128 'mode': {
1129 'real': 'mode_real',
1130 'prot': 'mode_prot',
1131 'long': 'mode_long',
1132 'v86': 'mode_v86',
1133 'smm': 'mode_smm',
1134 'vmx': 'mode_vmx',
1135 'svm': 'mode_svm',
1136 },
1137 # paging on/off
1138 'paging': {
1139 'on': 'paging_on',
1140 'off': 'paging_off',
1141 },
1142 # CPU vendor
1143 'vendor': {
1144 'amd': 'vendor_amd',
1145 'intel': 'vendor_intel',
1146 'via': 'vendor_via',
1147 },
1148 };
1149 ## Selector shorthand predicates.
1150 ## These translates into variable expressions.
1151 kdPredicates = {
1152 'o16': 'size==o16',
1153 'o32': 'size==o32',
1154 'o64': 'size==o64',
1155 'ring0': 'ring==0',
1156 '!ring0': 'ring==1..3',
1157 'ring1': 'ring==1',
1158 'ring2': 'ring==2',
1159 'ring3': 'ring==3',
1160 'user': 'ring==3',
1161 'supervisor': 'ring==0..2',
1162 'real': 'mode==real',
1163 'prot': 'mode==prot',
1164 'long': 'mode==long',
1165 'v86': 'mode==v86',
1166 'smm': 'mode==smm',
1167 'vmx': 'mode==vmx',
1168 'svm': 'mode==svm',
1169 'paging': 'paging==on',
1170 '!paging': 'paging==off',
1171 'amd': 'vendor==amd',
1172 '!amd': 'vendor!=amd',
1173 'intel': 'vendor==intel',
1174 '!intel': 'vendor!=intel',
1175 'via': 'vendor==via',
1176 '!via': 'vendor!=via',
1177 };
1178
1179 def __init__(self, sVariable, sOp, sValue):
1180 assert sVariable in self.kdVariables;
1181 assert sOp in self.kasCompareOps;
1182 assert sValue in self.kdVariables[sVariable];
1183 self.sVariable = sVariable;
1184 self.sOp = sOp;
1185 self.sValue = sValue;
1186
1187
1188class InstructionTest(object):
1189 """
1190 Instruction test.
1191 """
1192
1193 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1194 self.oInstr = oInstr; # type: InstructionTest
1195 self.aoInputs = []; # type: list(TestInOut)
1196 self.aoOutputs = []; # type: list(TestInOut)
1197 self.aoSelectors = []; # type: list(TestSelector)
1198
1199 def toString(self, fRepr = False):
1200 """
1201 Converts it to string representation.
1202 """
1203 asWords = [];
1204 if self.aoSelectors:
1205 for oSelector in self.aoSelectors:
1206 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1207 asWords.append('/');
1208
1209 for oModifier in self.aoInputs:
1210 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1211
1212 asWords.append('->');
1213
1214 for oModifier in self.aoOutputs:
1215 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1216
1217 if fRepr:
1218 return '<' + ' '.join(asWords) + '>';
1219 return ' '.join(asWords);
1220
1221 def __str__(self):
1222 """ Provide string represenation. """
1223 return self.toString(False);
1224
1225 def __repr__(self):
1226 """ Provide unambigious string representation. """
1227 return self.toString(True);
1228
1229class Operand(object):
1230 """
1231 Instruction operand.
1232 """
1233
1234 def __init__(self, sWhere, sType):
1235 assert sWhere in g_kdOpLocations, sWhere;
1236 assert sType in g_kdOpTypes, sType;
1237 self.sWhere = sWhere; ##< g_kdOpLocations
1238 self.sType = sType; ##< g_kdOpTypes
1239
1240 def usesModRM(self):
1241 """ Returns True if using some form of ModR/M encoding. """
1242 return self.sType[0] in ['E', 'G', 'M'];
1243
1244
1245
1246class Instruction(object): # pylint: disable=too-many-instance-attributes
1247 """
1248 Instruction.
1249 """
1250
1251 def __init__(self, sSrcFile, iLine):
1252 ## @name Core attributes.
1253 ## @{
1254 self.sMnemonic = None;
1255 self.sBrief = None;
1256 self.asDescSections = []; # type: list(str)
1257 self.aoMaps = []; # type: list(InstructionMap)
1258 self.aoOperands = []; # type: list(Operand)
1259 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1260 self.sOpcode = None; # type: str
1261 self.sSubOpcode = None; # type: str
1262 self.sEncoding = None;
1263 self.asFlTest = None;
1264 self.asFlModify = None;
1265 self.asFlUndefined = None;
1266 self.asFlSet = None;
1267 self.asFlClear = None;
1268 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1269 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1270 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1271 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1272 self.aoTests = []; # type: list(InstructionTest)
1273 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1274 self.oCpuExpr = None; ##< Some CPU restriction expression...
1275 self.sGroup = None;
1276 self.fUnused = False; ##< Unused instruction.
1277 self.fInvalid = False; ##< Invalid instruction (like UD2).
1278 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1279 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1280 ## @}
1281
1282 ## @name Implementation attributes.
1283 ## @{
1284 self.sStats = None;
1285 self.sFunction = None;
1286 self.fStub = False;
1287 self.fUdStub = False;
1288 ## @}
1289
1290 ## @name Decoding info
1291 ## @{
1292 self.sSrcFile = sSrcFile;
1293 self.iLineCreated = iLine;
1294 self.iLineCompleted = None;
1295 self.cOpTags = 0;
1296 self.iLineFnIemOpMacro = -1;
1297 self.iLineMnemonicMacro = -1;
1298 ## @}
1299
1300 ## @name Intermediate input fields.
1301 ## @{
1302 self.sRawDisOpNo = None;
1303 self.asRawDisParams = [];
1304 self.sRawIemOpFlags = None;
1305 self.sRawOldOpcodes = None;
1306 self.asCopyTests = [];
1307 ## @}
1308
1309 def toString(self, fRepr = False):
1310 """ Turn object into a string. """
1311 aasFields = [];
1312
1313 aasFields.append(['opcode', self.sOpcode]);
1314 aasFields.append(['mnemonic', self.sMnemonic]);
1315 for iOperand, oOperand in enumerate(self.aoOperands):
1316 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1317 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1318 aasFields.append(['encoding', self.sEncoding]);
1319 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1320 aasFields.append(['disenum', self.sDisEnum]);
1321 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1322 aasFields.append(['group', self.sGroup]);
1323 if self.fUnused: aasFields.append(['unused', 'True']);
1324 if self.fInvalid: aasFields.append(['invalid', 'True']);
1325 aasFields.append(['invlstyle', self.sInvalidStyle]);
1326 aasFields.append(['fltest', self.asFlTest]);
1327 aasFields.append(['flmodify', self.asFlModify]);
1328 aasFields.append(['flundef', self.asFlUndefined]);
1329 aasFields.append(['flset', self.asFlSet]);
1330 aasFields.append(['flclear', self.asFlClear]);
1331 aasFields.append(['mincpu', self.sMinCpu]);
1332 aasFields.append(['stats', self.sStats]);
1333 aasFields.append(['sFunction', self.sFunction]);
1334 if self.fStub: aasFields.append(['fStub', 'True']);
1335 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1336 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1337 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1338 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1339
1340 sRet = '<' if fRepr else '';
1341 for sField, sValue in aasFields:
1342 if sValue != None:
1343 if len(sRet) > 1:
1344 sRet += '; ';
1345 sRet += '%s=%s' % (sField, sValue,);
1346 if fRepr:
1347 sRet += '>';
1348
1349 return sRet;
1350
1351 def __str__(self):
1352 """ Provide string represenation. """
1353 return self.toString(False);
1354
1355 def __repr__(self):
1356 """ Provide unambigious string representation. """
1357 return self.toString(True);
1358
1359 def getOpcodeByte(self):
1360 """
1361 Decodes sOpcode into a byte range integer value.
1362 Raises exception if sOpcode is None or invalid.
1363 """
1364 if self.sOpcode is None:
1365 raise Exception('No opcode byte for %s!' % (self,));
1366 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1367
1368 # Full hex byte form.
1369 if sOpcode[:2] == '0x':
1370 return int(sOpcode, 16);
1371
1372 # The /r form:
1373 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1374 return int(sOpcode[-1:]) << 3;
1375
1376 # The 11/r form:
1377 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1378 return (int(sOpcode[-1:]) << 3) | 0xc0;
1379
1380 # The !11/r form (returns mod=1):
1381 ## @todo this doesn't really work...
1382 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1383 return (int(sOpcode[-1:]) << 3) | 0x80;
1384
1385 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1386
1387 @staticmethod
1388 def _flagsToIntegerMask(asFlags):
1389 """
1390 Returns the integer mask value for asFlags.
1391 """
1392 uRet = 0;
1393 if asFlags:
1394 for sFlag in asFlags:
1395 sConstant = g_kdEFlagsMnemonics[sFlag];
1396 assert sConstant[0] != '!', sConstant
1397 uRet |= g_kdX86EFlagsConstants[sConstant];
1398 return uRet;
1399
1400 def getTestedFlagsMask(self):
1401 """ Returns asFlTest into a integer mask value """
1402 return self._flagsToIntegerMask(self.asFlTest);
1403
1404 def getModifiedFlagsMask(self):
1405 """ Returns asFlModify into a integer mask value """
1406 return self._flagsToIntegerMask(self.asFlModify);
1407
1408 def getUndefinedFlagsMask(self):
1409 """ Returns asFlUndefined into a integer mask value """
1410 return self._flagsToIntegerMask(self.asFlUndefined);
1411
1412 def getSetFlagsMask(self):
1413 """ Returns asFlSet into a integer mask value """
1414 return self._flagsToIntegerMask(self.asFlSet);
1415
1416 def getClearedFlagsMask(self):
1417 """ Returns asFlClear into a integer mask value """
1418 return self._flagsToIntegerMask(self.asFlClear);
1419
1420
1421## All the instructions.
1422g_aoAllInstructions = []; # type: list(Instruction)
1423
1424## All the instructions indexed by statistics name (opstat).
1425g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1426
1427## All the instructions indexed by function name (opfunction).
1428g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1429
1430## Instructions tagged by oponlytest
1431g_aoOnlyTestInstructions = []; # type: list(Instruction)
1432
1433## Instruction maps.
1434g_dInstructionMaps = {
1435 'one': InstructionMap('one'),
1436 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1437 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1438 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1439 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1440 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1441 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1442 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1443 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1444 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1445 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1446 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1447 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1448 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1449 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1450 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1451 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1452 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1453 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1454 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1455
1456 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1457 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1458 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1459 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1460 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1461 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1462 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1463 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1464 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1465 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1466 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1467 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1468 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1469 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1470
1471 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1472 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1473
1474 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1475 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1476 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1477 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1478 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1479 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1480
1481 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1482 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1483
1484 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1485 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1486 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1487 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1488 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1489 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1490 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1491 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1492};
1493
1494
1495
1496class ParserException(Exception):
1497 """ Parser exception """
1498 def __init__(self, sMessage):
1499 Exception.__init__(self, sMessage);
1500
1501
1502class SimpleParser(object):
1503 """
1504 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1505 """
1506
1507 ## @name Parser state.
1508 ## @{
1509 kiCode = 0;
1510 kiCommentMulti = 1;
1511 ## @}
1512
1513 def __init__(self, sSrcFile, asLines, sDefaultMap):
1514 self.sSrcFile = sSrcFile;
1515 self.asLines = asLines;
1516 self.iLine = 0;
1517 self.iState = self.kiCode;
1518 self.sComment = '';
1519 self.iCommentLine = 0;
1520 self.aoCurInstrs = [];
1521
1522 assert sDefaultMap in g_dInstructionMaps;
1523 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1524
1525 self.cTotalInstr = 0;
1526 self.cTotalStubs = 0;
1527 self.cTotalTagged = 0;
1528
1529 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1530 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1531 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1532 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1533 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1534 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1535 self.fDebug = True;
1536
1537 self.dTagHandlers = {
1538 '@opbrief': self.parseTagOpBrief,
1539 '@opdesc': self.parseTagOpDesc,
1540 '@opmnemonic': self.parseTagOpMnemonic,
1541 '@op1': self.parseTagOpOperandN,
1542 '@op2': self.parseTagOpOperandN,
1543 '@op3': self.parseTagOpOperandN,
1544 '@op4': self.parseTagOpOperandN,
1545 '@oppfx': self.parseTagOpPfx,
1546 '@opmaps': self.parseTagOpMaps,
1547 '@opcode': self.parseTagOpcode,
1548 '@opcodesub': self.parseTagOpcodeSub,
1549 '@openc': self.parseTagOpEnc,
1550 '@opfltest': self.parseTagOpEFlags,
1551 '@opflmodify': self.parseTagOpEFlags,
1552 '@opflundef': self.parseTagOpEFlags,
1553 '@opflset': self.parseTagOpEFlags,
1554 '@opflclear': self.parseTagOpEFlags,
1555 '@ophints': self.parseTagOpHints,
1556 '@opdisenum': self.parseTagOpDisEnum,
1557 '@opmincpu': self.parseTagOpMinCpu,
1558 '@opcpuid': self.parseTagOpCpuId,
1559 '@opgroup': self.parseTagOpGroup,
1560 '@opunused': self.parseTagOpUnusedInvalid,
1561 '@opinvalid': self.parseTagOpUnusedInvalid,
1562 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1563 '@optest': self.parseTagOpTest,
1564 '@optestign': self.parseTagOpTestIgnore,
1565 '@optestignore': self.parseTagOpTestIgnore,
1566 '@opcopytests': self.parseTagOpCopyTests,
1567 '@oponly': self.parseTagOpOnlyTest,
1568 '@oponlytest': self.parseTagOpOnlyTest,
1569 '@opxcpttype': self.parseTagOpXcptType,
1570 '@opstats': self.parseTagOpStats,
1571 '@opfunction': self.parseTagOpFunction,
1572 '@opdone': self.parseTagOpDone,
1573 };
1574 for i in range(48):
1575 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1576 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1577
1578 self.asErrors = [];
1579
1580 def raiseError(self, sMessage):
1581 """
1582 Raise error prefixed with the source and line number.
1583 """
1584 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1585
1586 def raiseCommentError(self, iLineInComment, sMessage):
1587 """
1588 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1589 """
1590 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1591
1592 def error(self, sMessage):
1593 """
1594 Adds an error.
1595 returns False;
1596 """
1597 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1598 return False;
1599
1600 def errorComment(self, iLineInComment, sMessage):
1601 """
1602 Adds a comment error.
1603 returns False;
1604 """
1605 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1606 return False;
1607
1608 def printErrors(self):
1609 """
1610 Print the errors to stderr.
1611 Returns number of errors.
1612 """
1613 if self.asErrors:
1614 sys.stderr.write(u''.join(self.asErrors));
1615 return len(self.asErrors);
1616
1617 def debug(self, sMessage):
1618 """
1619 For debugging.
1620 """
1621 if self.fDebug:
1622 print('debug: %s' % (sMessage,));
1623
1624
1625 def addInstruction(self, iLine = None):
1626 """
1627 Adds an instruction.
1628 """
1629 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1630 g_aoAllInstructions.append(oInstr);
1631 self.aoCurInstrs.append(oInstr);
1632 return oInstr;
1633
1634 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1635 """
1636 Derives the mnemonic and operands from a IEM stats base name like string.
1637 """
1638 if oInstr.sMnemonic is None:
1639 asWords = sStats.split('_');
1640 oInstr.sMnemonic = asWords[0].lower();
1641 if len(asWords) > 1 and not oInstr.aoOperands:
1642 for sType in asWords[1:]:
1643 if sType in g_kdOpTypes:
1644 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1645 else:
1646 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1647 return False;
1648 return True;
1649
1650 def doneInstructionOne(self, oInstr, iLine):
1651 """
1652 Complete the parsing by processing, validating and expanding raw inputs.
1653 """
1654 assert oInstr.iLineCompleted is None;
1655 oInstr.iLineCompleted = iLine;
1656
1657 #
1658 # Specified instructions.
1659 #
1660 if oInstr.cOpTags > 0:
1661 if oInstr.sStats is None:
1662 pass;
1663
1664 #
1665 # Unspecified legacy stuff. We generally only got a few things to go on here.
1666 # /** Opcode 0x0f 0x00 /0. */
1667 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1668 #
1669 else:
1670 #if oInstr.sRawOldOpcodes:
1671 #
1672 #if oInstr.sMnemonic:
1673 pass;
1674
1675 #
1676 # Common defaults.
1677 #
1678
1679 # Guess mnemonic and operands from stats if the former is missing.
1680 if oInstr.sMnemonic is None:
1681 if oInstr.sStats is not None:
1682 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1683 elif oInstr.sFunction is not None:
1684 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1685
1686 # Derive the disassembler op enum constant from the mnemonic.
1687 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1688 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1689
1690 # Derive the IEM statistics base name from mnemonic and operand types.
1691 if oInstr.sStats is None:
1692 if oInstr.sFunction is not None:
1693 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1694 elif oInstr.sMnemonic is not None:
1695 oInstr.sStats = oInstr.sMnemonic;
1696 for oOperand in oInstr.aoOperands:
1697 if oOperand.sType:
1698 oInstr.sStats += '_' + oOperand.sType;
1699
1700 # Derive the IEM function name from mnemonic and operand types.
1701 if oInstr.sFunction is None:
1702 if oInstr.sMnemonic is not None:
1703 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1704 for oOperand in oInstr.aoOperands:
1705 if oOperand.sType:
1706 oInstr.sFunction += '_' + oOperand.sType;
1707 elif oInstr.sStats:
1708 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1709
1710 # Derive encoding from operands.
1711 if oInstr.sEncoding is None:
1712 if not oInstr.aoOperands:
1713 if oInstr.fUnused and oInstr.sSubOpcode:
1714 oInstr.sEncoding = 'ModR/M';
1715 else:
1716 oInstr.sEncoding = 'fixed';
1717 elif oInstr.aoOperands[0].usesModRM():
1718 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1719 oInstr.sEncoding = 'ModR/M+VEX';
1720 else:
1721 oInstr.sEncoding = 'ModR/M';
1722
1723 #
1724 # Apply default map and then add the instruction to all it's groups.
1725 #
1726 if not oInstr.aoMaps:
1727 oInstr.aoMaps = [ self.oDefaultMap, ];
1728 for oMap in oInstr.aoMaps:
1729 oMap.aoInstructions.append(oInstr);
1730
1731 #
1732 # Check the opstat value and add it to the opstat indexed dictionary.
1733 #
1734 if oInstr.sStats:
1735 if oInstr.sStats not in g_dAllInstructionsByStat:
1736 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1737 else:
1738 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1739 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1740
1741 #
1742 # Add to function indexed dictionary. We allow multiple instructions per function.
1743 #
1744 if oInstr.sFunction:
1745 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1746 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1747 else:
1748 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1749
1750 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1751 return True;
1752
1753 def doneInstructions(self, iLineInComment = None):
1754 """
1755 Done with current instruction.
1756 """
1757 for oInstr in self.aoCurInstrs:
1758 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1759 if oInstr.fStub:
1760 self.cTotalStubs += 1;
1761
1762 self.cTotalInstr += len(self.aoCurInstrs);
1763
1764 self.sComment = '';
1765 self.aoCurInstrs = [];
1766 return True;
1767
1768 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1769 """
1770 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1771 is False, only None values and empty strings are replaced.
1772 """
1773 for oInstr in self.aoCurInstrs:
1774 if fOverwrite is not True:
1775 oOldValue = getattr(oInstr, sAttrib);
1776 if oOldValue is not None:
1777 continue;
1778 setattr(oInstr, sAttrib, oValue);
1779
1780 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1781 """
1782 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1783 If fOverwrite is False, only None values and empty strings are replaced.
1784 """
1785 for oInstr in self.aoCurInstrs:
1786 aoArray = getattr(oInstr, sAttrib);
1787 while len(aoArray) <= iEntry:
1788 aoArray.append(None);
1789 if fOverwrite is True or aoArray[iEntry] is None:
1790 aoArray[iEntry] = oValue;
1791
1792 def parseCommentOldOpcode(self, asLines):
1793 """ Deals with 'Opcode 0xff /4' like comments """
1794 asWords = asLines[0].split();
1795 if len(asWords) >= 2 \
1796 and asWords[0] == 'Opcode' \
1797 and ( asWords[1].startswith('0x')
1798 or asWords[1].startswith('0X')):
1799 asWords = asWords[:1];
1800 for iWord, sWord in enumerate(asWords):
1801 if sWord.startswith('0X'):
1802 sWord = '0x' + sWord[:2];
1803 asWords[iWord] = asWords;
1804 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1805
1806 return False;
1807
1808 def ensureInstructionForOpTag(self, iTagLine):
1809 """ Ensure there is an instruction for the op-tag being parsed. """
1810 if not self.aoCurInstrs:
1811 self.addInstruction(self.iCommentLine + iTagLine);
1812 for oInstr in self.aoCurInstrs:
1813 oInstr.cOpTags += 1;
1814 if oInstr.cOpTags == 1:
1815 self.cTotalTagged += 1;
1816 return self.aoCurInstrs[-1];
1817
1818 @staticmethod
1819 def flattenSections(aasSections):
1820 """
1821 Flattens multiline sections into stripped single strings.
1822 Returns list of strings, on section per string.
1823 """
1824 asRet = [];
1825 for asLines in aasSections:
1826 if asLines:
1827 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1828 return asRet;
1829
1830 @staticmethod
1831 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1832 """
1833 Flattens sections into a simple stripped string with newlines as
1834 section breaks. The final section does not sport a trailing newline.
1835 """
1836 # Typical: One section with a single line.
1837 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1838 return aasSections[0][0].strip();
1839
1840 sRet = '';
1841 for iSection, asLines in enumerate(aasSections):
1842 if asLines:
1843 if iSection > 0:
1844 sRet += sSectionSep;
1845 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1846 return sRet;
1847
1848
1849
1850 ## @name Tag parsers
1851 ## @{
1852
1853 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1854 """
1855 Tag: \@opbrief
1856 Value: Text description, multiple sections, appended.
1857
1858 Brief description. If not given, it's the first sentence from @opdesc.
1859 """
1860 oInstr = self.ensureInstructionForOpTag(iTagLine);
1861
1862 # Flatten and validate the value.
1863 sBrief = self.flattenAllSections(aasSections);
1864 if not sBrief:
1865 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1866 if sBrief[-1] != '.':
1867 sBrief = sBrief + '.';
1868 if len(sBrief) > 180:
1869 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1870 offDot = sBrief.find('.');
1871 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1872 offDot = sBrief.find('.', offDot + 1);
1873 if offDot >= 0 and offDot != len(sBrief) - 1:
1874 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1875
1876 # Update the instruction.
1877 if oInstr.sBrief is not None:
1878 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1879 % (sTag, oInstr.sBrief, sBrief,));
1880 _ = iEndLine;
1881 return True;
1882
1883 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1884 """
1885 Tag: \@opdesc
1886 Value: Text description, multiple sections, appended.
1887
1888 It is used to describe instructions.
1889 """
1890 oInstr = self.ensureInstructionForOpTag(iTagLine);
1891 if aasSections:
1892 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1893 return True;
1894
1895 _ = sTag; _ = iEndLine;
1896 return True;
1897
1898 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1899 """
1900 Tag: @opmenmonic
1901 Value: mnemonic
1902
1903 The 'mnemonic' value must be a valid C identifier string. Because of
1904 prefixes, groups and whatnot, there times when the mnemonic isn't that
1905 of an actual assembler mnemonic.
1906 """
1907 oInstr = self.ensureInstructionForOpTag(iTagLine);
1908
1909 # Flatten and validate the value.
1910 sMnemonic = self.flattenAllSections(aasSections);
1911 if not self.oReMnemonic.match(sMnemonic):
1912 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1913 if oInstr.sMnemonic is not None:
1914 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1915 % (sTag, oInstr.sMnemonic, sMnemonic,));
1916 oInstr.sMnemonic = sMnemonic
1917
1918 _ = iEndLine;
1919 return True;
1920
1921 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1922 """
1923 Tags: \@op1, \@op2, \@op3, \@op4
1924 Value: [where:]type
1925
1926 The 'where' value indicates where the operand is found, like the 'reg'
1927 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1928 a list.
1929
1930 The 'type' value indicates the operand type. These follow the types
1931 given in the opcode tables in the CPU reference manuals.
1932 See Instruction.kdOperandTypes for a list.
1933
1934 """
1935 oInstr = self.ensureInstructionForOpTag(iTagLine);
1936 idxOp = int(sTag[-1]) - 1;
1937 assert idxOp >= 0 and idxOp < 4;
1938
1939 # flatten, split up, and validate the "where:type" value.
1940 sFlattened = self.flattenAllSections(aasSections);
1941 asSplit = sFlattened.split(':');
1942 if len(asSplit) == 1:
1943 sType = asSplit[0];
1944 sWhere = None;
1945 elif len(asSplit) == 2:
1946 (sWhere, sType) = asSplit;
1947 else:
1948 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1949
1950 if sType not in g_kdOpTypes:
1951 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1952 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1953 if sWhere is None:
1954 sWhere = g_kdOpTypes[sType][1];
1955 elif sWhere not in g_kdOpLocations:
1956 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1957 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1958
1959 # Insert the operand, refusing to overwrite an existing one.
1960 while idxOp >= len(oInstr.aoOperands):
1961 oInstr.aoOperands.append(None);
1962 if oInstr.aoOperands[idxOp] is not None:
1963 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1964 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1965 sWhere, sType,));
1966 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1967
1968 _ = iEndLine;
1969 return True;
1970
1971 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1972 """
1973 Tag: \@opmaps
1974 Value: map[,map2]
1975
1976 Indicates which maps the instruction is in. There is a default map
1977 associated with each input file.
1978 """
1979 oInstr = self.ensureInstructionForOpTag(iTagLine);
1980
1981 # Flatten, split up and validate the value.
1982 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1983 asMaps = sFlattened.split(',');
1984 if not asMaps:
1985 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1986 for sMap in asMaps:
1987 if sMap not in g_dInstructionMaps:
1988 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1989 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1990
1991 # Add the maps to the current list. Throw errors on duplicates.
1992 for oMap in oInstr.aoMaps:
1993 if oMap.sName in asMaps:
1994 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1995
1996 for sMap in asMaps:
1997 oMap = g_dInstructionMaps[sMap];
1998 if oMap not in oInstr.aoMaps:
1999 oInstr.aoMaps.append(oMap);
2000 else:
2001 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2002
2003 _ = iEndLine;
2004 return True;
2005
2006 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2007 """
2008 Tag: \@oppfx
2009 Value: n/a|none|0x66|0xf3|0xf2
2010
2011 Required prefix for the instruction. (In a (E)VEX context this is the
2012 value of the 'pp' field rather than an actual prefix.)
2013 """
2014 oInstr = self.ensureInstructionForOpTag(iTagLine);
2015
2016 # Flatten and validate the value.
2017 sFlattened = self.flattenAllSections(aasSections);
2018 asPrefixes = sFlattened.split();
2019 if len(asPrefixes) > 1:
2020 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2021
2022 sPrefix = asPrefixes[0].lower();
2023 if sPrefix == 'none':
2024 sPrefix = 'none';
2025 elif sPrefix == 'n/a':
2026 sPrefix = None;
2027 else:
2028 if len(sPrefix) == 2:
2029 sPrefix = '0x' + sPrefix;
2030 if not _isValidOpcodeByte(sPrefix):
2031 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2032
2033 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2034 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2035
2036 # Set it.
2037 if oInstr.sPrefix is not None:
2038 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2039 oInstr.sPrefix = sPrefix;
2040
2041 _ = iEndLine;
2042 return True;
2043
2044 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2045 """
2046 Tag: \@opcode
2047 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2048
2049 The opcode byte or sub-byte for the instruction in the context of a map.
2050 """
2051 oInstr = self.ensureInstructionForOpTag(iTagLine);
2052
2053 # Flatten and validate the value.
2054 sOpcode = self.flattenAllSections(aasSections);
2055 if _isValidOpcodeByte(sOpcode):
2056 pass;
2057 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2058 pass;
2059 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2060 pass;
2061 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2062 pass;
2063 else:
2064 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2065
2066 # Set it.
2067 if oInstr.sOpcode is not None:
2068 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2069 oInstr.sOpcode = sOpcode;
2070
2071 _ = iEndLine;
2072 return True;
2073
2074 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2075 """
2076 Tag: \@opcodesub
2077 Value: none | 11 mr/reg | !11 mr/reg
2078
2079 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2080 represents exactly two different instructions. The more proper way would
2081 be to go via maps with two members, but this is faster.
2082 """
2083 oInstr = self.ensureInstructionForOpTag(iTagLine);
2084
2085 # Flatten and validate the value.
2086 sSubOpcode = self.flattenAllSections(aasSections);
2087 if sSubOpcode not in g_kdSubOpcodes:
2088 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2089 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2090
2091 # Set it.
2092 if oInstr.sSubOpcode is not None:
2093 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2094 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2095 oInstr.sSubOpcode = sSubOpcode;
2096
2097 _ = iEndLine;
2098 return True;
2099
2100 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2101 """
2102 Tag: \@openc
2103 Value: ModR/M|fixed|prefix|<map name>
2104
2105 The instruction operand encoding style.
2106 """
2107 oInstr = self.ensureInstructionForOpTag(iTagLine);
2108
2109 # Flatten and validate the value.
2110 sEncoding = self.flattenAllSections(aasSections);
2111 if sEncoding in g_kdEncodings:
2112 pass;
2113 elif sEncoding in g_dInstructionMaps:
2114 pass;
2115 elif not _isValidOpcodeByte(sEncoding):
2116 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2117
2118 # Set it.
2119 if oInstr.sEncoding is not None:
2120 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2121 % ( sTag, oInstr.sEncoding, sEncoding,));
2122 oInstr.sEncoding = sEncoding;
2123
2124 _ = iEndLine;
2125 return True;
2126
2127 ## EFlags tag to Instruction attribute name.
2128 kdOpFlagToAttr = {
2129 '@opfltest': 'asFlTest',
2130 '@opflmodify': 'asFlModify',
2131 '@opflundef': 'asFlUndefined',
2132 '@opflset': 'asFlSet',
2133 '@opflclear': 'asFlClear',
2134 };
2135
2136 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2137 """
2138 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2139 Value: <eflags specifier>
2140
2141 """
2142 oInstr = self.ensureInstructionForOpTag(iTagLine);
2143
2144 # Flatten, split up and validate the values.
2145 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2146 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2147 asFlags = [];
2148 else:
2149 fRc = True;
2150 for iFlag, sFlag in enumerate(asFlags):
2151 if sFlag not in g_kdEFlagsMnemonics:
2152 if sFlag.strip() in g_kdEFlagsMnemonics:
2153 asFlags[iFlag] = sFlag.strip();
2154 else:
2155 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2156 if not fRc:
2157 return False;
2158
2159 # Set them.
2160 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2161 if asOld is not None:
2162 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2163 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2164
2165 _ = iEndLine;
2166 return True;
2167
2168 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2169 """
2170 Tag: \@ophints
2171 Value: Comma or space separated list of flags and hints.
2172
2173 This covers the disassembler flags table and more.
2174 """
2175 oInstr = self.ensureInstructionForOpTag(iTagLine);
2176
2177 # Flatten as a space separated list, split it up and validate the values.
2178 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2179 if len(asHints) == 1 and asHints[0].lower() == 'none':
2180 asHints = [];
2181 else:
2182 fRc = True;
2183 for iHint, sHint in enumerate(asHints):
2184 if sHint not in g_kdHints:
2185 if sHint.strip() in g_kdHints:
2186 sHint[iHint] = sHint.strip();
2187 else:
2188 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2189 if not fRc:
2190 return False;
2191
2192 # Append them.
2193 for sHint in asHints:
2194 if sHint not in oInstr.dHints:
2195 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2196 else:
2197 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2198
2199 _ = iEndLine;
2200 return True;
2201
2202 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2203 """
2204 Tag: \@opdisenum
2205 Value: OP_XXXX
2206
2207 This is for select a specific (legacy) disassembler enum value for the
2208 instruction.
2209 """
2210 oInstr = self.ensureInstructionForOpTag(iTagLine);
2211
2212 # Flatten and split.
2213 asWords = self.flattenAllSections(aasSections).split();
2214 if len(asWords) != 1:
2215 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2216 if not asWords:
2217 return False;
2218 sDisEnum = asWords[0];
2219 if not self.oReDisEnum.match(sDisEnum):
2220 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2221 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2222
2223 # Set it.
2224 if oInstr.sDisEnum is not None:
2225 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2226 oInstr.sDisEnum = sDisEnum;
2227
2228 _ = iEndLine;
2229 return True;
2230
2231 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2232 """
2233 Tag: \@opmincpu
2234 Value: <simple CPU name>
2235
2236 Indicates when this instruction was introduced.
2237 """
2238 oInstr = self.ensureInstructionForOpTag(iTagLine);
2239
2240 # Flatten the value, split into words, make sure there's just one, valid it.
2241 asCpus = self.flattenAllSections(aasSections).split();
2242 if len(asCpus) > 1:
2243 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2244
2245 sMinCpu = asCpus[0];
2246 if sMinCpu in g_kdCpuNames:
2247 oInstr.sMinCpu = sMinCpu;
2248 else:
2249 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2250 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2251
2252 # Set it.
2253 if oInstr.sMinCpu is None:
2254 oInstr.sMinCpu = sMinCpu;
2255 elif oInstr.sMinCpu != sMinCpu:
2256 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2257
2258 _ = iEndLine;
2259 return True;
2260
2261 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2262 """
2263 Tag: \@opcpuid
2264 Value: none | <CPUID flag specifier>
2265
2266 CPUID feature bit which is required for the instruction to be present.
2267 """
2268 oInstr = self.ensureInstructionForOpTag(iTagLine);
2269
2270 # Flatten as a space separated list, split it up and validate the values.
2271 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2272 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2273 asCpuIds = [];
2274 else:
2275 fRc = True;
2276 for iCpuId, sCpuId in enumerate(asCpuIds):
2277 if sCpuId not in g_kdCpuIdFlags:
2278 if sCpuId.strip() in g_kdCpuIdFlags:
2279 sCpuId[iCpuId] = sCpuId.strip();
2280 else:
2281 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2282 if not fRc:
2283 return False;
2284
2285 # Append them.
2286 for sCpuId in asCpuIds:
2287 if sCpuId not in oInstr.asCpuIds:
2288 oInstr.asCpuIds.append(sCpuId);
2289 else:
2290 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2291
2292 _ = iEndLine;
2293 return True;
2294
2295 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2296 """
2297 Tag: \@opgroup
2298 Value: op_grp1[_subgrp2[_subsubgrp3]]
2299
2300 Instruction grouping.
2301 """
2302 oInstr = self.ensureInstructionForOpTag(iTagLine);
2303
2304 # Flatten as a space separated list, split it up and validate the values.
2305 asGroups = self.flattenAllSections(aasSections).split();
2306 if len(asGroups) != 1:
2307 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2308 sGroup = asGroups[0];
2309 if not self.oReGroupName.match(sGroup):
2310 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2311 % (sTag, sGroup, self.oReGroupName.pattern));
2312
2313 # Set it.
2314 if oInstr.sGroup is not None:
2315 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2316 oInstr.sGroup = sGroup;
2317
2318 _ = iEndLine;
2319 return True;
2320
2321 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2322 """
2323 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2324 Value: <invalid opcode behaviour style>
2325
2326 The \@opunused indicates the specification is for a currently unused
2327 instruction encoding.
2328
2329 The \@opinvalid indicates the specification is for an invalid currently
2330 instruction encoding (like UD2).
2331
2332 The \@opinvlstyle just indicates how CPUs decode the instruction when
2333 not supported (\@opcpuid, \@opmincpu) or disabled.
2334 """
2335 oInstr = self.ensureInstructionForOpTag(iTagLine);
2336
2337 # Flatten as a space separated list, split it up and validate the values.
2338 asStyles = self.flattenAllSections(aasSections).split();
2339 if len(asStyles) != 1:
2340 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2341 sStyle = asStyles[0];
2342 if sStyle not in g_kdInvalidStyles:
2343 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2344 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2345 # Set it.
2346 if oInstr.sInvalidStyle is not None:
2347 return self.errorComment(iTagLine,
2348 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2349 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2350 oInstr.sInvalidStyle = sStyle;
2351 if sTag == '@opunused':
2352 oInstr.fUnused = True;
2353 elif sTag == '@opinvalid':
2354 oInstr.fInvalid = True;
2355
2356 _ = iEndLine;
2357 return True;
2358
2359 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2360 """
2361 Tag: \@optest
2362 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2363 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2364
2365 The main idea here is to generate basic instruction tests.
2366
2367 The probably simplest way of handling the diverse input, would be to use
2368 it to produce size optimized byte code for a simple interpreter that
2369 modifies the register input and output states.
2370
2371 An alternative to the interpreter would be creating multiple tables,
2372 but that becomes rather complicated wrt what goes where and then to use
2373 them in an efficient manner.
2374 """
2375 oInstr = self.ensureInstructionForOpTag(iTagLine);
2376
2377 #
2378 # Do it section by section.
2379 #
2380 for asSectionLines in aasSections:
2381 #
2382 # Sort the input into outputs, inputs and selector conditions.
2383 #
2384 sFlatSection = self.flattenAllSections([asSectionLines,]);
2385 if not sFlatSection:
2386 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2387 continue;
2388 oTest = InstructionTest(oInstr);
2389
2390 asSelectors = [];
2391 asInputs = [];
2392 asOutputs = [];
2393 asCur = asOutputs;
2394 fRc = True;
2395 asWords = sFlatSection.split();
2396 for iWord in range(len(asWords) - 1, -1, -1):
2397 sWord = asWords[iWord];
2398 # Check for array switchers.
2399 if sWord == '->':
2400 if asCur != asOutputs:
2401 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2402 break;
2403 asCur = asInputs;
2404 elif sWord == '/':
2405 if asCur != asInputs:
2406 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2407 break;
2408 asCur = asSelectors;
2409 else:
2410 asCur.insert(0, sWord);
2411
2412 #
2413 # Validate and add selectors.
2414 #
2415 for sCond in asSelectors:
2416 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2417 oSelector = None;
2418 for sOp in TestSelector.kasCompareOps:
2419 off = sCondExp.find(sOp);
2420 if off >= 0:
2421 sVariable = sCondExp[:off];
2422 sValue = sCondExp[off + len(sOp):];
2423 if sVariable in TestSelector.kdVariables:
2424 if sValue in TestSelector.kdVariables[sVariable]:
2425 oSelector = TestSelector(sVariable, sOp, sValue);
2426 else:
2427 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2428 % ( sTag, sValue, sCond,
2429 TestSelector.kdVariables[sVariable].keys(),));
2430 else:
2431 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2432 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2433 break;
2434 if oSelector is not None:
2435 for oExisting in oTest.aoSelectors:
2436 if oExisting.sVariable == oSelector.sVariable:
2437 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2438 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2439 oTest.aoSelectors.append(oSelector);
2440 else:
2441 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2442
2443 #
2444 # Validate outputs and inputs, adding them to the test as we go along.
2445 #
2446 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2447 asValidFieldKinds = [ 'both', sDesc, ];
2448 for sItem in asItems:
2449 oItem = None;
2450 for sOp in TestInOut.kasOperators:
2451 off = sItem.find(sOp);
2452 if off < 0:
2453 continue;
2454 sField = sItem[:off];
2455 sValueType = sItem[off + len(sOp):];
2456 if sField in TestInOut.kdFields \
2457 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2458 asSplit = sValueType.split(':', 1);
2459 sValue = asSplit[0];
2460 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2461 if sType in TestInOut.kdTypes:
2462 oValid = TestInOut.kdTypes[sType].validate(sValue);
2463 if oValid is True:
2464 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2465 oItem = TestInOut(sField, sOp, sValue, sType);
2466 else:
2467 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2468 % ( sTag, sDesc, sItem, ));
2469 else:
2470 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2471 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2472 else:
2473 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2474 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2475 else:
2476 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2477 % ( sTag, sDesc, sField, sItem,
2478 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2479 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2480 break;
2481 if oItem is not None:
2482 for oExisting in aoDst:
2483 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2484 self.errorComment(iTagLine,
2485 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2486 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2487 aoDst.append(oItem);
2488 else:
2489 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2490
2491 #
2492 # .
2493 #
2494 if fRc:
2495 oInstr.aoTests.append(oTest);
2496 else:
2497 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2498 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2499 % (sTag, asSelectors, asInputs, asOutputs,));
2500
2501 _ = iEndLine;
2502 return True;
2503
2504 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2505 """
2506 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2507 """
2508 oInstr = self.ensureInstructionForOpTag(iTagLine);
2509
2510 iTest = 0;
2511 if sTag[-1] == ']':
2512 iTest = int(sTag[8:-1]);
2513 else:
2514 iTest = int(sTag[7:]);
2515
2516 if iTest != len(oInstr.aoTests):
2517 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2518 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2519
2520 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2521 """
2522 Tag: \@optestign | \@optestignore
2523 Value: <value is ignored>
2524
2525 This is a simple trick to ignore a test while debugging another.
2526
2527 See also \@oponlytest.
2528 """
2529 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2530 return True;
2531
2532 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2533 """
2534 Tag: \@opcopytests
2535 Value: <opstat | function> [..]
2536 Example: \@opcopytests add_Eb_Gb
2537
2538 Trick to avoid duplicating tests for different encodings of the same
2539 operation.
2540 """
2541 oInstr = self.ensureInstructionForOpTag(iTagLine);
2542
2543 # Flatten, validate and append the copy job to the instruction. We execute
2544 # them after parsing all the input so we can handle forward references.
2545 asToCopy = self.flattenAllSections(aasSections).split();
2546 if not asToCopy:
2547 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2548 for sToCopy in asToCopy:
2549 if sToCopy not in oInstr.asCopyTests:
2550 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2551 oInstr.asCopyTests.append(sToCopy);
2552 else:
2553 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2554 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2555 else:
2556 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2557
2558 _ = iEndLine;
2559 return True;
2560
2561 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2562 """
2563 Tag: \@oponlytest | \@oponly
2564 Value: none
2565
2566 Only test instructions with this tag. This is a trick that is handy
2567 for singling out one or two new instructions or tests.
2568
2569 See also \@optestignore.
2570 """
2571 oInstr = self.ensureInstructionForOpTag(iTagLine);
2572
2573 # Validate and add instruction to only test dictionary.
2574 sValue = self.flattenAllSections(aasSections).strip();
2575 if sValue:
2576 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2577
2578 if oInstr not in g_aoOnlyTestInstructions:
2579 g_aoOnlyTestInstructions.append(oInstr);
2580
2581 _ = iEndLine;
2582 return True;
2583
2584 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2585 """
2586 Tag: \@opxcpttype
2587 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2588
2589 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2590 """
2591 oInstr = self.ensureInstructionForOpTag(iTagLine);
2592
2593 # Flatten as a space separated list, split it up and validate the values.
2594 asTypes = self.flattenAllSections(aasSections).split();
2595 if len(asTypes) != 1:
2596 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2597 sType = asTypes[0];
2598 if sType not in g_kdXcptTypes:
2599 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2600 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2601 # Set it.
2602 if oInstr.sXcptType is not None:
2603 return self.errorComment(iTagLine,
2604 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2605 % ( sTag, oInstr.sXcptType, sType,));
2606 oInstr.sXcptType = sType;
2607
2608 _ = iEndLine;
2609 return True;
2610
2611 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2612 """
2613 Tag: \@opfunction
2614 Value: <VMM function name>
2615
2616 This is for explicitly setting the IEM function name. Normally we pick
2617 this up from the FNIEMOP_XXX macro invocation after the description, or
2618 generate it from the mnemonic and operands.
2619
2620 It it thought it maybe necessary to set it when specifying instructions
2621 which implementation isn't following immediately or aren't implemented yet.
2622 """
2623 oInstr = self.ensureInstructionForOpTag(iTagLine);
2624
2625 # Flatten and validate the value.
2626 sFunction = self.flattenAllSections(aasSections);
2627 if not self.oReFunctionName.match(sFunction):
2628 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2629 % (sTag, sFunction, self.oReFunctionName.pattern));
2630
2631 if oInstr.sFunction is not None:
2632 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2633 % (sTag, oInstr.sFunction, sFunction,));
2634 oInstr.sFunction = sFunction;
2635
2636 _ = iEndLine;
2637 return True;
2638
2639 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2640 """
2641 Tag: \@opstats
2642 Value: <VMM statistics base name>
2643
2644 This is for explicitly setting the statistics name. Normally we pick
2645 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2646 the mnemonic and operands.
2647
2648 It it thought it maybe necessary to set it when specifying instructions
2649 which implementation isn't following immediately or aren't implemented yet.
2650 """
2651 oInstr = self.ensureInstructionForOpTag(iTagLine);
2652
2653 # Flatten and validate the value.
2654 sStats = self.flattenAllSections(aasSections);
2655 if not self.oReStatsName.match(sStats):
2656 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2657 % (sTag, sStats, self.oReStatsName.pattern));
2658
2659 if oInstr.sStats is not None:
2660 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2661 % (sTag, oInstr.sStats, sStats,));
2662 oInstr.sStats = sStats;
2663
2664 _ = iEndLine;
2665 return True;
2666
2667 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2668 """
2669 Tag: \@opdone
2670 Value: none
2671
2672 Used to explictily flush the instructions that have been specified.
2673 """
2674 sFlattened = self.flattenAllSections(aasSections);
2675 if sFlattened != '':
2676 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2677 _ = sTag; _ = iEndLine;
2678 return self.doneInstructions();
2679
2680 ## @}
2681
2682
2683 def parseComment(self):
2684 """
2685 Parse the current comment (self.sComment).
2686
2687 If it's a opcode specifiying comment, we reset the macro stuff.
2688 """
2689 #
2690 # Reject if comment doesn't seem to contain anything interesting.
2691 #
2692 if self.sComment.find('Opcode') < 0 \
2693 and self.sComment.find('@') < 0:
2694 return False;
2695
2696 #
2697 # Split the comment into lines, removing leading asterisks and spaces.
2698 # Also remove leading and trailing empty lines.
2699 #
2700 asLines = self.sComment.split('\n');
2701 for iLine, sLine in enumerate(asLines):
2702 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2703
2704 while asLines and not asLines[0]:
2705 self.iCommentLine += 1;
2706 asLines.pop(0);
2707
2708 while asLines and not asLines[-1]:
2709 asLines.pop(len(asLines) - 1);
2710
2711 #
2712 # Check for old style: Opcode 0x0f 0x12
2713 #
2714 if asLines[0].startswith('Opcode '):
2715 self.parseCommentOldOpcode(asLines);
2716
2717 #
2718 # Look for @op* tagged data.
2719 #
2720 cOpTags = 0;
2721 sFlatDefault = None;
2722 sCurTag = '@default';
2723 iCurTagLine = 0;
2724 asCurSection = [];
2725 aasSections = [ asCurSection, ];
2726 for iLine, sLine in enumerate(asLines):
2727 if not sLine.startswith('@'):
2728 if sLine:
2729 asCurSection.append(sLine);
2730 elif asCurSection:
2731 asCurSection = [];
2732 aasSections.append(asCurSection);
2733 else:
2734 #
2735 # Process the previous tag.
2736 #
2737 if not asCurSection and len(aasSections) > 1:
2738 aasSections.pop(-1);
2739 if sCurTag in self.dTagHandlers:
2740 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2741 cOpTags += 1;
2742 elif sCurTag.startswith('@op'):
2743 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2744 elif sCurTag == '@default':
2745 sFlatDefault = self.flattenAllSections(aasSections);
2746 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2747 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2748 elif sCurTag in ['@encoding', '@opencoding']:
2749 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2750
2751 #
2752 # New tag.
2753 #
2754 asSplit = sLine.split(None, 1);
2755 sCurTag = asSplit[0].lower();
2756 if len(asSplit) > 1:
2757 asCurSection = [asSplit[1],];
2758 else:
2759 asCurSection = [];
2760 aasSections = [asCurSection, ];
2761 iCurTagLine = iLine;
2762
2763 #
2764 # Process the final tag.
2765 #
2766 if not asCurSection and len(aasSections) > 1:
2767 aasSections.pop(-1);
2768 if sCurTag in self.dTagHandlers:
2769 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2770 cOpTags += 1;
2771 elif sCurTag.startswith('@op'):
2772 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2773 elif sCurTag == '@default':
2774 sFlatDefault = self.flattenAllSections(aasSections);
2775
2776 #
2777 # Don't allow default text in blocks containing @op*.
2778 #
2779 if cOpTags > 0 and sFlatDefault:
2780 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2781
2782 return True;
2783
2784 def parseMacroInvocation(self, sInvocation):
2785 """
2786 Parses a macro invocation.
2787
2788 Returns a tuple, first element is the offset following the macro
2789 invocation. The second element is a list of macro arguments, where the
2790 zero'th is the macro name.
2791 """
2792 # First the name.
2793 offOpen = sInvocation.find('(');
2794 if offOpen <= 0:
2795 self.raiseError("macro invocation open parenthesis not found");
2796 sName = sInvocation[:offOpen].strip();
2797 if not self.oReMacroName.match(sName):
2798 return self.error("invalid macro name '%s'" % (sName,));
2799 asRet = [sName, ];
2800
2801 # Arguments.
2802 iLine = self.iLine;
2803 cDepth = 1;
2804 off = offOpen + 1;
2805 offStart = off;
2806 while cDepth > 0:
2807 if off >= len(sInvocation):
2808 if iLine >= len(self.asLines):
2809 return self.error('macro invocation beyond end of file');
2810 sInvocation += self.asLines[iLine];
2811 iLine += 1;
2812 ch = sInvocation[off];
2813
2814 if ch == ',' or ch == ')':
2815 if cDepth == 1:
2816 asRet.append(sInvocation[offStart:off].strip());
2817 offStart = off + 1;
2818 if ch == ')':
2819 cDepth -= 1;
2820 elif ch == '(':
2821 cDepth += 1;
2822 off += 1;
2823
2824 return (off, asRet);
2825
2826 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2827 """
2828 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2829 """
2830 offHit = sCode.find(sMacro);
2831 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2832 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2833 return (offHit + offAfter, asRet);
2834 return (len(sCode), None);
2835
2836 def findAndParseMacroInvocation(self, sCode, sMacro):
2837 """
2838 Returns None if not found, arguments as per parseMacroInvocation if found.
2839 """
2840 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2841
2842 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2843 """
2844 Returns same as findAndParseMacroInvocation.
2845 """
2846 for sMacro in asMacro:
2847 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2848 if asRet is not None:
2849 return asRet;
2850 return None;
2851
2852 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2853 sDisHints, sIemHints, asOperands):
2854 """
2855 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2856 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2857 """
2858 #
2859 # Some invocation checks.
2860 #
2861 if sUpper != sUpper.upper():
2862 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2863 if sLower != sLower.lower():
2864 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2865 if sUpper.lower() != sLower:
2866 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2867 if not self.oReMnemonic.match(sLower):
2868 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2869
2870 #
2871 # Check if sIemHints tells us to not consider this macro invocation.
2872 #
2873 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2874 return True;
2875
2876 # Apply to the last instruction only for now.
2877 if not self.aoCurInstrs:
2878 self.addInstruction();
2879 oInstr = self.aoCurInstrs[-1];
2880 if oInstr.iLineMnemonicMacro == -1:
2881 oInstr.iLineMnemonicMacro = self.iLine;
2882 else:
2883 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2884 % (sMacro, oInstr.iLineMnemonicMacro,));
2885
2886 # Mnemonic
2887 if oInstr.sMnemonic is None:
2888 oInstr.sMnemonic = sLower;
2889 elif oInstr.sMnemonic != sLower:
2890 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2891
2892 # Process operands.
2893 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2894 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2895 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2896 for iOperand, sType in enumerate(asOperands):
2897 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2898 if sWhere is None:
2899 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2900 if iOperand < len(oInstr.aoOperands): # error recovery.
2901 sWhere = oInstr.aoOperands[iOperand].sWhere;
2902 sType = oInstr.aoOperands[iOperand].sType;
2903 else:
2904 sWhere = 'reg';
2905 sType = 'Gb';
2906 if iOperand == len(oInstr.aoOperands):
2907 oInstr.aoOperands.append(Operand(sWhere, sType))
2908 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2909 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2910 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2911 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2912
2913 # Encoding.
2914 if sForm not in g_kdIemForms:
2915 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2916 else:
2917 if oInstr.sEncoding is None:
2918 oInstr.sEncoding = g_kdIemForms[sForm][0];
2919 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2920 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2921 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2922
2923 # Check the parameter locations for the encoding.
2924 if g_kdIemForms[sForm][1] is not None:
2925 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2926 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2927 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2928 else:
2929 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2930 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2931 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2932 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2933
2934 # Stats.
2935 if not self.oReStatsName.match(sStats):
2936 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2937 elif oInstr.sStats is None:
2938 oInstr.sStats = sStats;
2939 elif oInstr.sStats != sStats:
2940 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2941 % (sMacro, oInstr.sStats, sStats,));
2942
2943 # Process the hints (simply merge with @ophints w/o checking anything).
2944 for sHint in sDisHints.split('|'):
2945 sHint = sHint.strip();
2946 if sHint.startswith('DISOPTYPE_'):
2947 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2948 if sShortHint in g_kdHints:
2949 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2950 else:
2951 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2952 elif sHint != '0':
2953 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2954
2955 for sHint in sIemHints.split('|'):
2956 sHint = sHint.strip();
2957 if sHint.startswith('IEMOPHINT_'):
2958 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2959 if sShortHint in g_kdHints:
2960 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2961 else:
2962 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2963 elif sHint != '0':
2964 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2965
2966
2967 _ = sAsm;
2968 return True;
2969
2970 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2971 """
2972 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2973 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2974 """
2975 if not asOperands:
2976 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2977 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2978 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2979
2980 def checkCodeForMacro(self, sCode):
2981 """
2982 Checks code for relevant macro invocation.
2983 """
2984 #
2985 # Scan macro invocations.
2986 #
2987 if sCode.find('(') > 0:
2988 # Look for instruction decoder function definitions. ASSUME single line.
2989 asArgs = self.findAndParseFirstMacroInvocation(sCode,
2990 [ 'FNIEMOP_DEF',
2991 'FNIEMOP_STUB',
2992 'FNIEMOP_STUB_1',
2993 'FNIEMOP_UD_STUB',
2994 'FNIEMOP_UD_STUB_1' ]);
2995 if asArgs is not None:
2996 sFunction = asArgs[1];
2997
2998 if not self.aoCurInstrs:
2999 self.addInstruction();
3000 for oInstr in self.aoCurInstrs:
3001 if oInstr.iLineFnIemOpMacro == -1:
3002 oInstr.iLineFnIemOpMacro = self.iLine;
3003 else:
3004 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3005 self.setInstrunctionAttrib('sFunction', sFunction);
3006 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3007 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3008 if asArgs[0].find('STUB') > 0:
3009 self.doneInstructions();
3010 return True;
3011
3012 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3013 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3014 if asArgs is not None:
3015 if len(self.aoCurInstrs) == 1:
3016 oInstr = self.aoCurInstrs[0];
3017 if oInstr.sStats is None:
3018 oInstr.sStats = asArgs[1];
3019 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3020
3021 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3022 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3023 if asArgs is not None:
3024 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3025 []);
3026 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3027 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3028 if asArgs is not None:
3029 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3030 [asArgs[6],]);
3031 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3032 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3033 if asArgs is not None:
3034 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3035 [asArgs[6], asArgs[7]]);
3036 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3037 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3038 if asArgs is not None:
3039 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3040 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3041 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3042 # a_fIemHints)
3043 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3044 if asArgs is not None:
3045 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3046 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3047
3048 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3049 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3050 if asArgs is not None:
3051 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3052 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3053 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3054 if asArgs is not None:
3055 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3056 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3057 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3058 if asArgs is not None:
3059 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3060 [asArgs[4], asArgs[5],]);
3061 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3062 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3063 if asArgs is not None:
3064 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3065 [asArgs[4], asArgs[5], asArgs[6],]);
3066 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3067 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3068 if asArgs is not None:
3069 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3070 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3071
3072 return False;
3073
3074
3075 def parse(self):
3076 """
3077 Parses the given file.
3078 Returns number or errors.
3079 Raises exception on fatal trouble.
3080 """
3081 #self.debug('Parsing %s' % (self.sSrcFile,));
3082
3083 while self.iLine < len(self.asLines):
3084 sLine = self.asLines[self.iLine];
3085 self.iLine += 1;
3086
3087 # We only look for comments, so only lines with a slash might possibly
3088 # influence the parser state.
3089 offSlash = sLine.find('/');
3090 if offSlash >= 0:
3091 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3092 offLine = 0;
3093 while offLine < len(sLine):
3094 if self.iState == self.kiCode:
3095 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3096 if offHit >= 0:
3097 self.checkCodeForMacro(sLine[offLine:offHit]);
3098 self.sComment = '';
3099 self.iCommentLine = self.iLine;
3100 self.iState = self.kiCommentMulti;
3101 offLine = offHit + 2;
3102 else:
3103 self.checkCodeForMacro(sLine[offLine:]);
3104 offLine = len(sLine);
3105
3106 elif self.iState == self.kiCommentMulti:
3107 offHit = sLine.find('*/', offLine);
3108 if offHit >= 0:
3109 self.sComment += sLine[offLine:offHit];
3110 self.iState = self.kiCode;
3111 offLine = offHit + 2;
3112 self.parseComment();
3113 else:
3114 self.sComment += sLine[offLine:];
3115 offLine = len(sLine);
3116 else:
3117 assert False;
3118 # C++ line comment.
3119 elif offSlash > 0:
3120 self.checkCodeForMacro(sLine[:offSlash]);
3121
3122 # No slash, but append the line if in multi-line comment.
3123 elif self.iState == self.kiCommentMulti:
3124 #self.debug('line %d: multi' % (self.iLine,));
3125 self.sComment += sLine;
3126
3127 # No slash, but check code line for relevant macro.
3128 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3129 #self.debug('line %d: macro' % (self.iLine,));
3130 self.checkCodeForMacro(sLine);
3131
3132 # If the line is a '}' in the first position, complete the instructions.
3133 elif self.iState == self.kiCode and sLine[0] == '}':
3134 #self.debug('line %d: }' % (self.iLine,));
3135 self.doneInstructions();
3136
3137 self.doneInstructions();
3138 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3139 return self.printErrors();
3140
3141
3142def __parseFileByName(sSrcFile, sDefaultMap):
3143 """
3144 Parses one source file for instruction specfications.
3145 """
3146 #
3147 # Read sSrcFile into a line array.
3148 #
3149 try:
3150 oFile = open(sSrcFile, "r");
3151 except Exception as oXcpt:
3152 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3153 try:
3154 asLines = oFile.readlines();
3155 except Exception as oXcpt:
3156 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3157 finally:
3158 oFile.close();
3159
3160 #
3161 # Do the parsing.
3162 #
3163 try:
3164 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3165 except ParserException as oXcpt:
3166 print(str(oXcpt));
3167 raise;
3168 except Exception as oXcpt:
3169 raise;
3170
3171 return cErrors;
3172
3173
3174def __doTestCopying():
3175 """
3176 Executes the asCopyTests instructions.
3177 """
3178 asErrors = [];
3179 for oDstInstr in g_aoAllInstructions:
3180 if oDstInstr.asCopyTests:
3181 for sSrcInstr in oDstInstr.asCopyTests:
3182 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3183 if oSrcInstr:
3184 aoSrcInstrs = [oSrcInstr,];
3185 else:
3186 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3187 if aoSrcInstrs:
3188 for oSrcInstr in aoSrcInstrs:
3189 if oSrcInstr != oDstInstr:
3190 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3191 else:
3192 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3193 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3194 else:
3195 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3196 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3197
3198 if asErrors:
3199 sys.stderr.write(u''.join(asErrors));
3200 return len(asErrors);
3201
3202
3203def __applyOnlyTest():
3204 """
3205 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3206 all other instructions so that only these get tested.
3207 """
3208 if g_aoOnlyTestInstructions:
3209 for oInstr in g_aoAllInstructions:
3210 if oInstr.aoTests:
3211 if oInstr not in g_aoOnlyTestInstructions:
3212 oInstr.aoTests = [];
3213 return 0;
3214
3215def __parseAll():
3216 """
3217 Parses all the IEMAllInstruction*.cpp.h files.
3218
3219 Raises exception on failure.
3220 """
3221 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3222 cErrors = 0;
3223 for sDefaultMap, sName in [
3224 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3225 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3226 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3227 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3228 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3229 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3230 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3231 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3232 ]:
3233 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3234 cErrors += __doTestCopying();
3235 cErrors += __applyOnlyTest();
3236
3237 if cErrors != 0:
3238 #raise Exception('%d parse errors' % (cErrors,));
3239 sys.exit(1);
3240 return True;
3241
3242
3243
3244__parseAll();
3245
3246
3247#
3248# Generators (may perhaps move later).
3249#
3250def generateDisassemblerTables(oDstFile = sys.stdout):
3251 """
3252 Generates disassembler tables.
3253 """
3254
3255 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3256 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3257 assert oMap.sName == sName;
3258 asLines = [];
3259
3260 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3261 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3262 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3263 asLines.append('{');
3264
3265 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3266
3267 aoTableOrder = oMap.getInstructionsInTableOrder();
3268 for iInstr, oInstr in enumerate(aoTableOrder):
3269
3270 if (iInstr & 0xf) == 0:
3271 if iInstr != 0:
3272 asLines.append('');
3273 asLines.append(' /* %x */' % (iInstr >> 4,));
3274
3275 if oInstr is None:
3276 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3277 elif isinstance(oInstr, list):
3278 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3279 else:
3280 sMacro = 'OP';
3281 cMaxOperands = 3;
3282 if len(oInstr.aoOperands) > 3:
3283 sMacro = 'OPVEX'
3284 cMaxOperands = 4;
3285 assert len(oInstr.aoOperands) <= cMaxOperands;
3286
3287 #
3288 # Format string.
3289 #
3290 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3291 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3292 sTmp += ' ' if iOperand == 0 else ',';
3293 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3294 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3295 else:
3296 sTmp += g_kdOpTypes[oOperand.sType][2];
3297 sTmp += '",';
3298 asColumns = [ sTmp, ];
3299
3300 #
3301 # Decoders.
3302 #
3303 iStart = len(asColumns);
3304 if oInstr.sEncoding is None:
3305 pass;
3306 elif oInstr.sEncoding == 'ModR/M':
3307 # ASSUME the first operand is using the ModR/M encoding
3308 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3309 asColumns.append('IDX_ParseModRM,');
3310 ## @todo IDX_ParseVexDest
3311 # Is second operand using ModR/M too?
3312 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3313 asColumns.append('IDX_UseModRM,')
3314 elif oInstr.sEncoding in [ 'prefix', ]:
3315 for oOperand in oInstr.aoOperands:
3316 asColumns.append('0,');
3317 elif oInstr.sEncoding in [ 'fixed' ]:
3318 pass;
3319 elif oInstr.sEncoding == 'vex2':
3320 asColumns.append('IDX_ParseVex2b,')
3321 elif oInstr.sEncoding == 'vex3':
3322 asColumns.append('IDX_ParseVex3b,')
3323 elif oInstr.sEncoding in g_dInstructionMaps:
3324 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3325 else:
3326 ## @todo
3327 #IDX_ParseTwoByteEsc,
3328 #IDX_ParseGrp1,
3329 #IDX_ParseShiftGrp2,
3330 #IDX_ParseGrp3,
3331 #IDX_ParseGrp4,
3332 #IDX_ParseGrp5,
3333 #IDX_Parse3DNow,
3334 #IDX_ParseGrp6,
3335 #IDX_ParseGrp7,
3336 #IDX_ParseGrp8,
3337 #IDX_ParseGrp9,
3338 #IDX_ParseGrp10,
3339 #IDX_ParseGrp12,
3340 #IDX_ParseGrp13,
3341 #IDX_ParseGrp14,
3342 #IDX_ParseGrp15,
3343 #IDX_ParseGrp16,
3344 #IDX_ParseThreeByteEsc4,
3345 #IDX_ParseThreeByteEsc5,
3346 #IDX_ParseModFence,
3347 #IDX_ParseEscFP,
3348 #IDX_ParseNopPause,
3349 #IDX_ParseInvOpModRM,
3350 assert False, str(oInstr);
3351
3352 # Check for immediates and stuff in the remaining operands.
3353 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3354 sIdx = g_kdOpTypes[oOperand.sType][0];
3355 if sIdx != 'IDX_UseModRM':
3356 asColumns.append(sIdx + ',');
3357 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3358
3359 #
3360 # Opcode and operands.
3361 #
3362 assert oInstr.sDisEnum, str(oInstr);
3363 asColumns.append(oInstr.sDisEnum + ',');
3364 iStart = len(asColumns)
3365 for oOperand in oInstr.aoOperands:
3366 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3367 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3368
3369 #
3370 # Flags.
3371 #
3372 sTmp = '';
3373 for sHint in sorted(oInstr.dHints.keys()):
3374 sDefine = g_kdHints[sHint];
3375 if sDefine.startswith('DISOPTYPE_'):
3376 if sTmp:
3377 sTmp += ' | ' + sDefine;
3378 else:
3379 sTmp += sDefine;
3380 if sTmp:
3381 sTmp += '),';
3382 else:
3383 sTmp += '0),';
3384 asColumns.append(sTmp);
3385
3386 #
3387 # Format the columns into a line.
3388 #
3389 sLine = '';
3390 for i, s in enumerate(asColumns):
3391 if len(sLine) < aoffColumns[i]:
3392 sLine += ' ' * (aoffColumns[i] - len(sLine));
3393 else:
3394 sLine += ' ';
3395 sLine += s;
3396
3397 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3398 # DISOPTYPE_HARMLESS),
3399 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3400 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3401
3402 asLines.append(sLine);
3403
3404 asLines.append('};');
3405 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3406
3407 #
3408 # Write out the lines.
3409 #
3410 oDstFile.write('\n'.join(asLines));
3411 oDstFile.write('\n');
3412 break; #for now
3413
3414if __name__ == '__main__':
3415 generateDisassemblerTables();
3416
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette