VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66891

Last change on this file since 66891 was 66815, checked in by vboxsync, 8 years ago

IEM: s/V\([a-z]*\)ZxReg/V\1ZxReg/g because 'V' can only indicate a register so the 'Reg' part is superfluous.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 142.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66815 2017-05-05 19:35:39Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66815 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
235
236 # ModR/M.rm - memory only.
237 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
238 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
239 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
240 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
241 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
242 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
243 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
244 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
245
246 # ModR/M.reg
247 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
248 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
249 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
250 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
251 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
252 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
253 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
254 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
255 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
256 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
257 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
258 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
259 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
260 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
261 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
262 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
263 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
264 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
265 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
266
267 # Immediate values.
268 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
269 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
270 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
271 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
272 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
273 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
274
275 # Address operands (no ModR/M).
276 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
277 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
278
279 # Relative jump targets
280 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
281 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
282
283 # DS:rSI
284 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
285 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
286 # ES:rDI
287 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
288 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
289
290 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
291
292 # Fixed registers.
293 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
294 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
295 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
296 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
297 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
298 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
299 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
300 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
301};
302
303# IDX_ParseFixedReg
304# IDX_ParseVexDest
305
306
307## IEMFORM_XXX mappings.
308g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
309 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
310 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
311 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
312 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
313 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
314 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
315 'M': ( 'ModR/M', [ 'rm', ], ),
316 'M_REG': ( 'ModR/M', [ 'rm', ], ),
317 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
318 'R': ( 'ModR/M', [ 'reg', ], ),
319
320 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
321 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
322 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
323 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
324 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
325 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
326 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
327 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
328 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
329 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
330 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
331 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
332
333 'FIXED': ( 'fixed', None, )
334};
335
336## \@oppfx values.
337g_kdPrefixes = {
338 'none': [],
339 '0x66': [],
340 '0xf3': [],
341 '0xf2': [],
342};
343
344## Special \@opcode tag values.
345g_kdSpecialOpcodes = {
346 '/reg': [],
347 'mr/reg': [],
348 '11 /reg': [],
349 '!11 /reg': [],
350 '11 mr/reg': [],
351 '!11 mr/reg': [],
352};
353
354## Special \@opcodesub tag values.
355g_kdSubOpcodes = {
356 'none': [ None, ],
357 '11 mr/reg': [ '11 mr/reg', ],
358 '11': [ '11 mr/reg', ], ##< alias
359 '!11 mr/reg': [ '!11 mr/reg', ],
360 '!11': [ '!11 mr/reg', ], ##< alias
361};
362
363## Valid values for \@openc
364g_kdEncodings = {
365 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
366 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
367 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
368 'prefix': [ None, ], ##< Prefix
369};
370
371## \@opunused, \@opinvalid, \@opinvlstyle
372g_kdInvalidStyles = {
373 'immediate': [], ##< CPU stops decoding immediately after the opcode.
374 'intel-modrm': [], ##< Intel decodes ModR/M.
375 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
376 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
377 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
378};
379
380g_kdCpuNames = {
381 '8086': (),
382 '80186': (),
383 '80286': (),
384 '80386': (),
385 '80486': (),
386};
387
388## \@opcpuid
389g_kdCpuIdFlags = {
390 'vme': 'X86_CPUID_FEATURE_EDX_VME',
391 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
392 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
393 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
394 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
395 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
396 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
397 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
398 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
399 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
400 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
401 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
402 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
403 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
404 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
405 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
406 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
407 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
408 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
409 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
410 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
411 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
412 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
413 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
414 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
415 'aes': 'X86_CPUID_FEATURE_ECX_AES',
416 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
417 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
418 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
419 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
420
421 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
422 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
423 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
424 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
425 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
426 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
427 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
428 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
429 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
430 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
431};
432
433## \@ophints values.
434g_kdHints = {
435 'invalid': 'DISOPTYPE_INVALID', ##<
436 'harmless': 'DISOPTYPE_HARMLESS', ##<
437 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
438 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
439 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
440 'portio': 'DISOPTYPE_PORTIO', ##<
441 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
442 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
443 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
444 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
445 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
446 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
447 'illegal': 'DISOPTYPE_ILLEGAL', ##<
448 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
449 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
450 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
451 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
452 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
453 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
454 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
455 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
456 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
457 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
458 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
459 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
460 ## (only in 16 & 32 bits mode!)
461 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
462 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
463 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
464 'ignores_op_size': '', ##< Ignores both operand size prefixes.
465 'lock_allowed': '', ##< Lock prefix allowed.
466};
467
468## \@opxcpttype values (see SDMv2 2.4, 2.7).
469g_kdXcptTypes = {
470 'none': [],
471 '1': [],
472 '2': [],
473 '3': [],
474 '4': [],
475 '4UA': [],
476 '5': [],
477 '6': [],
478 '7': [],
479 '8': [],
480 '11': [],
481 '12': [],
482 'E1': [],
483 'E1NF': [],
484 'E2': [],
485 'E3': [],
486 'E3NF': [],
487 'E4': [],
488 'E4NF': [],
489 'E5': [],
490 'E5NF': [],
491 'E6': [],
492 'E6NF': [],
493 'E7NF': [],
494 'E9': [],
495 'E9NF': [],
496 'E10': [],
497 'E11': [],
498 'E12': [],
499 'E12NF': [],
500};
501
502
503def _isValidOpcodeByte(sOpcode):
504 """
505 Checks if sOpcode is a valid lower case opcode byte.
506 Returns true/false.
507 """
508 if len(sOpcode) == 4:
509 if sOpcode[:2] == '0x':
510 if sOpcode[2] in '0123456789abcdef':
511 if sOpcode[3] in '0123456789abcdef':
512 return True;
513 return False;
514
515
516class InstructionMap(object):
517 """
518 Instruction map.
519
520 The opcode map provides the lead opcode bytes (empty for the one byte
521 opcode map). An instruction can be member of multiple opcode maps as long
522 as it uses the same opcode value within the map (because of VEX).
523 """
524
525 kdEncodings = {
526 'legacy': [],
527 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
528 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
529 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
530 'xop8': [], ##< XOP prefix with vvvvv = 8
531 'xop9': [], ##< XOP prefix with vvvvv = 9
532 'xop10': [], ##< XOP prefix with vvvvv = 10
533 };
534 ## Selectors.
535 ## The first value is the number of table entries required by a
536 ## decoder or disassembler for this type of selector.
537 kdSelectors = {
538 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
539 '/r': [ 8, ], ##< modrm.reg selects the instruction.
540 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
541 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
542 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
543 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
544 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
545 };
546
547 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
548 assert sSelector in self.kdSelectors;
549 assert sEncoding in self.kdEncodings;
550 if asLeadOpcodes is None:
551 asLeadOpcodes = [];
552 else:
553 for sOpcode in asLeadOpcodes:
554 assert _isValidOpcodeByte(sOpcode);
555 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
556
557 self.sName = sName;
558 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
559 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
560 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
561 self.aoInstructions = []; # type: Instruction
562 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
563
564 def getTableSize(self):
565 """
566 Number of table entries. This corresponds directly to the selector.
567 """
568 return self.kdSelectors[self.sSelector][0];
569
570 def getInstructionIndex(self, oInstr):
571 """
572 Returns the table index for the instruction.
573 """
574 bOpcode = oInstr.getOpcodeByte();
575
576 # The byte selector is simple. We need a full opcode byte and need just return it.
577 if self.sSelector == 'byte':
578 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
579 return bOpcode;
580
581 # The other selectors needs masking and shifting.
582 if self.sSelector == '/r':
583 return (bOpcode >> 3) & 0x7;
584
585 if self.sSelector == 'mod /r':
586 return (bOpcode >> 3) & 0x1f;
587
588 if self.sSelector == 'memreg /r':
589 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
590
591 if self.sSelector == '!11 /r':
592 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
593 return (bOpcode >> 3) & 0x7;
594
595 if self.sSelector == '11 /r':
596 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
597 return (bOpcode >> 3) & 0x7;
598
599 if self.sSelector == '11':
600 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
601 return bOpcode & 0x3f;
602
603 assert False, self.sSelector;
604 return -1;
605
606 def getInstructionsInTableOrder(self):
607 """
608 Get instructions in table order.
609
610 Returns array of instructions. Normally there is exactly one
611 instruction per entry. However the entry could also be None if
612 not instruction was specified for that opcode value. Or there
613 could be a list of instructions to deal with special encodings
614 where for instance prefix (e.g. REX.W) encodes a different
615 instruction or different CPUs have different instructions or
616 prefixes in the same place.
617 """
618 # Start with empty table.
619 cTable = self.getTableSize();
620 aoTable = [None] * cTable;
621
622 # Insert the instructions.
623 for oInstr in self.aoInstructions:
624 if oInstr.sOpcode:
625 idxOpcode = self.getInstructionIndex(oInstr);
626 assert idxOpcode < cTable, str(idxOpcode);
627
628 oExisting = aoTable[idxOpcode];
629 if oExisting is None:
630 aoTable[idxOpcode] = oInstr;
631 elif not isinstance(oExisting, list):
632 aoTable[idxOpcode] = list([oExisting, oInstr]);
633 else:
634 oExisting.append(oInstr);
635
636 return aoTable;
637
638
639 def getDisasTableName(self):
640 """
641 Returns the disassembler table name for this map.
642 """
643 sName = 'g_aDisas';
644 for sWord in self.sName.split('_'):
645 if sWord == 'm': # suffix indicating modrm.mod==mem
646 sName += '_m';
647 elif sWord == 'r': # suffix indicating modrm.mod==reg
648 sName += '_r';
649 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
650 sName += '_' + sWord;
651 else:
652 sWord = sWord.replace('grp', 'Grp');
653 sWord = sWord.replace('map', 'Map');
654 sName += sWord[0].upper() + sWord[1:];
655 return sName;
656
657
658class TestType(object):
659 """
660 Test value type.
661
662 This base class deals with integer like values. The fUnsigned constructor
663 parameter indicates the default stance on zero vs sign extending. It is
664 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
665 """
666 def __init__(self, sName, acbSizes = None, fUnsigned = True):
667 self.sName = sName;
668 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
669 self.fUnsigned = fUnsigned;
670
671 class BadValue(Exception):
672 """ Bad value exception. """
673 def __init__(self, sMessage):
674 Exception.__init__(self, sMessage);
675 self.sMessage = sMessage;
676
677 ## For ascii ~ operator.
678 kdHexInv = {
679 '0': 'f',
680 '1': 'e',
681 '2': 'd',
682 '3': 'c',
683 '4': 'b',
684 '5': 'a',
685 '6': '9',
686 '7': '8',
687 '8': '7',
688 '9': '6',
689 'a': '5',
690 'b': '4',
691 'c': '3',
692 'd': '2',
693 'e': '1',
694 'f': '0',
695 };
696
697 def get(self, sValue):
698 """
699 Get the shortest normal sized byte representation of oValue.
700
701 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
702 The latter form is for AND+OR pairs where the first entry is what to
703 AND with the field and the second the one or OR with.
704
705 Raises BadValue if invalid value.
706 """
707 if not sValue:
708 raise TestType.BadValue('empty value');
709
710 # Deal with sign and detect hexadecimal or decimal.
711 fSignExtend = not self.fUnsigned;
712 if sValue[0] == '-' or sValue[0] == '+':
713 fSignExtend = True;
714 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
715 else:
716 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
717
718 # try convert it to long integer.
719 try:
720 iValue = long(sValue, 16 if fHex else 10);
721 except Exception as oXcpt:
722 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
723
724 # Convert the hex string and pad it to a decent value. Negative values
725 # needs to be manually converted to something non-negative (~-n + 1).
726 if iValue >= 0:
727 sHex = hex(iValue);
728 if sys.version_info[0] < 3:
729 assert sHex[-1] == 'L';
730 sHex = sHex[:-1];
731 assert sHex[:2] == '0x';
732 sHex = sHex[2:];
733 else:
734 sHex = hex(-iValue - 1);
735 if sys.version_info[0] < 3:
736 assert sHex[-1] == 'L';
737 sHex = sHex[:-1];
738 assert sHex[:2] == '0x';
739 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
740 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
741 sHex = 'f' + sHex;
742
743 cDigits = len(sHex);
744 if cDigits <= self.acbSizes[-1] * 2:
745 for cb in self.acbSizes:
746 cNaturalDigits = cb * 2;
747 if cDigits <= cNaturalDigits:
748 break;
749 else:
750 cNaturalDigits = self.acbSizes[-1] * 2;
751 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
752 assert isinstance(cNaturalDigits, int)
753
754 if cNaturalDigits != cDigits:
755 cNeeded = cNaturalDigits - cDigits;
756 if iValue >= 0:
757 sHex = ('0' * cNeeded) + sHex;
758 else:
759 sHex = ('f' * cNeeded) + sHex;
760
761 # Invert and convert to bytearray and return it.
762 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
763
764 return ((fSignExtend, abValue),);
765
766 def validate(self, sValue):
767 """
768 Returns True if value is okay, error message on failure.
769 """
770 try:
771 self.get(sValue);
772 except TestType.BadValue as oXcpt:
773 return oXcpt.sMessage;
774 return True;
775
776 def isAndOrPair(self, sValue):
777 """
778 Checks if sValue is a pair.
779 """
780 _ = sValue;
781 return False;
782
783
784class TestTypeEflags(TestType):
785 """
786 Special value parsing for EFLAGS/RFLAGS/FLAGS.
787 """
788
789 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
790
791 def __init__(self, sName):
792 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
793
794 def get(self, sValue):
795 fClear = 0;
796 fSet = 0;
797 for sFlag in sValue.split(','):
798 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
799 if sConstant is None:
800 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
801 if sConstant[0] == '!':
802 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
803 else:
804 fSet |= g_kdX86EFlagsConstants[sConstant];
805
806 aoSet = TestType.get(self, '0x%x' % (fSet,));
807 if fClear != 0:
808 aoClear = TestType.get(self, '%#x' % (fClear,))
809 assert self.isAndOrPair(sValue) is True;
810 return (aoClear[0], aoSet[0]);
811 assert self.isAndOrPair(sValue) is False;
812 return aoSet;
813
814 def isAndOrPair(self, sValue):
815 for sZeroFlag in self.kdZeroValueFlags:
816 if sValue.find(sZeroFlag) >= 0:
817 return True;
818 return False;
819
820class TestTypeFromDict(TestType):
821 """
822 Special value parsing for CR0.
823 """
824
825 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
826
827 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
828 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
829 self.kdConstantsAndValues = kdConstantsAndValues;
830 self.sConstantPrefix = sConstantPrefix;
831
832 def get(self, sValue):
833 fValue = 0;
834 for sFlag in sValue.split(','):
835 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
836 if fFlagValue is None:
837 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
838 fValue |= fFlagValue;
839 return TestType.get(self, '0x%x' % (fValue,));
840
841
842class TestInOut(object):
843 """
844 One input or output state modifier.
845
846 This should be thought as values to modify BS3REGCTX and extended (needs
847 to be structured) state.
848 """
849 ## Assigned operators.
850 kasOperators = [
851 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
852 '&~=',
853 '&=',
854 '|=',
855 '='
856 ];
857 ## Types
858 kdTypes = {
859 'uint': TestType('uint', fUnsigned = True),
860 'int': TestType('int'),
861 'efl': TestTypeEflags('efl'),
862 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
863 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
864 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
865 };
866 ## CPU context fields.
867 kdFields = {
868 # name: ( default type, [both|input|output], )
869 # Operands.
870 'op1': ( 'uint', 'both', ), ## \@op1
871 'op2': ( 'uint', 'both', ), ## \@op2
872 'op3': ( 'uint', 'both', ), ## \@op3
873 'op4': ( 'uint', 'both', ), ## \@op4
874 # Flags.
875 'efl': ( 'efl', 'both', ),
876 'efl_undef': ( 'uint', 'output', ),
877 # 8-bit GPRs.
878 'al': ( 'uint', 'both', ),
879 'cl': ( 'uint', 'both', ),
880 'dl': ( 'uint', 'both', ),
881 'bl': ( 'uint', 'both', ),
882 'ah': ( 'uint', 'both', ),
883 'ch': ( 'uint', 'both', ),
884 'dh': ( 'uint', 'both', ),
885 'bh': ( 'uint', 'both', ),
886 'r8l': ( 'uint', 'both', ),
887 'r9l': ( 'uint', 'both', ),
888 'r10l': ( 'uint', 'both', ),
889 'r11l': ( 'uint', 'both', ),
890 'r12l': ( 'uint', 'both', ),
891 'r13l': ( 'uint', 'both', ),
892 'r14l': ( 'uint', 'both', ),
893 'r15l': ( 'uint', 'both', ),
894 # 16-bit GPRs.
895 'ax': ( 'uint', 'both', ),
896 'dx': ( 'uint', 'both', ),
897 'cx': ( 'uint', 'both', ),
898 'bx': ( 'uint', 'both', ),
899 'sp': ( 'uint', 'both', ),
900 'bp': ( 'uint', 'both', ),
901 'si': ( 'uint', 'both', ),
902 'di': ( 'uint', 'both', ),
903 'r8w': ( 'uint', 'both', ),
904 'r9w': ( 'uint', 'both', ),
905 'r10w': ( 'uint', 'both', ),
906 'r11w': ( 'uint', 'both', ),
907 'r12w': ( 'uint', 'both', ),
908 'r13w': ( 'uint', 'both', ),
909 'r14w': ( 'uint', 'both', ),
910 'r15w': ( 'uint', 'both', ),
911 # 32-bit GPRs.
912 'eax': ( 'uint', 'both', ),
913 'edx': ( 'uint', 'both', ),
914 'ecx': ( 'uint', 'both', ),
915 'ebx': ( 'uint', 'both', ),
916 'esp': ( 'uint', 'both', ),
917 'ebp': ( 'uint', 'both', ),
918 'esi': ( 'uint', 'both', ),
919 'edi': ( 'uint', 'both', ),
920 'r8d': ( 'uint', 'both', ),
921 'r9d': ( 'uint', 'both', ),
922 'r10d': ( 'uint', 'both', ),
923 'r11d': ( 'uint', 'both', ),
924 'r12d': ( 'uint', 'both', ),
925 'r13d': ( 'uint', 'both', ),
926 'r14d': ( 'uint', 'both', ),
927 'r15d': ( 'uint', 'both', ),
928 # 64-bit GPRs.
929 'rax': ( 'uint', 'both', ),
930 'rdx': ( 'uint', 'both', ),
931 'rcx': ( 'uint', 'both', ),
932 'rbx': ( 'uint', 'both', ),
933 'rsp': ( 'uint', 'both', ),
934 'rbp': ( 'uint', 'both', ),
935 'rsi': ( 'uint', 'both', ),
936 'rdi': ( 'uint', 'both', ),
937 'r8': ( 'uint', 'both', ),
938 'r9': ( 'uint', 'both', ),
939 'r10': ( 'uint', 'both', ),
940 'r11': ( 'uint', 'both', ),
941 'r12': ( 'uint', 'both', ),
942 'r13': ( 'uint', 'both', ),
943 'r14': ( 'uint', 'both', ),
944 'r15': ( 'uint', 'both', ),
945 # 16-bit, 32-bit or 64-bit registers according to operand size.
946 'oz.rax': ( 'uint', 'both', ),
947 'oz.rdx': ( 'uint', 'both', ),
948 'oz.rcx': ( 'uint', 'both', ),
949 'oz.rbx': ( 'uint', 'both', ),
950 'oz.rsp': ( 'uint', 'both', ),
951 'oz.rbp': ( 'uint', 'both', ),
952 'oz.rsi': ( 'uint', 'both', ),
953 'oz.rdi': ( 'uint', 'both', ),
954 'oz.r8': ( 'uint', 'both', ),
955 'oz.r9': ( 'uint', 'both', ),
956 'oz.r10': ( 'uint', 'both', ),
957 'oz.r11': ( 'uint', 'both', ),
958 'oz.r12': ( 'uint', 'both', ),
959 'oz.r13': ( 'uint', 'both', ),
960 'oz.r14': ( 'uint', 'both', ),
961 'oz.r15': ( 'uint', 'both', ),
962 # Control registers.
963 'cr0': ( 'cr0', 'both', ),
964 'cr4': ( 'cr4', 'both', ),
965 'xcr0': ( 'xcr0', 'both', ),
966 # FPU Registers
967 'fcw': ( 'uint', 'both', ),
968 'fsw': ( 'uint', 'both', ),
969 'ftw': ( 'uint', 'both', ),
970 'fop': ( 'uint', 'both', ),
971 'fpuip': ( 'uint', 'both', ),
972 'fpucs': ( 'uint', 'both', ),
973 'fpudp': ( 'uint', 'both', ),
974 'fpuds': ( 'uint', 'both', ),
975 'mxcsr': ( 'uint', 'both', ),
976 'st0': ( 'uint', 'both', ),
977 'st1': ( 'uint', 'both', ),
978 'st2': ( 'uint', 'both', ),
979 'st3': ( 'uint', 'both', ),
980 'st4': ( 'uint', 'both', ),
981 'st5': ( 'uint', 'both', ),
982 'st6': ( 'uint', 'both', ),
983 'st7': ( 'uint', 'both', ),
984 # MMX registers.
985 'mm0': ( 'uint', 'both', ),
986 'mm1': ( 'uint', 'both', ),
987 'mm2': ( 'uint', 'both', ),
988 'mm3': ( 'uint', 'both', ),
989 'mm4': ( 'uint', 'both', ),
990 'mm5': ( 'uint', 'both', ),
991 'mm6': ( 'uint', 'both', ),
992 'mm7': ( 'uint', 'both', ),
993 # SSE registers.
994 'xmm0': ( 'uint', 'both', ),
995 'xmm1': ( 'uint', 'both', ),
996 'xmm2': ( 'uint', 'both', ),
997 'xmm3': ( 'uint', 'both', ),
998 'xmm4': ( 'uint', 'both', ),
999 'xmm5': ( 'uint', 'both', ),
1000 'xmm6': ( 'uint', 'both', ),
1001 'xmm7': ( 'uint', 'both', ),
1002 'xmm8': ( 'uint', 'both', ),
1003 'xmm9': ( 'uint', 'both', ),
1004 'xmm10': ( 'uint', 'both', ),
1005 'xmm11': ( 'uint', 'both', ),
1006 'xmm12': ( 'uint', 'both', ),
1007 'xmm13': ( 'uint', 'both', ),
1008 'xmm14': ( 'uint', 'both', ),
1009 'xmm15': ( 'uint', 'both', ),
1010 'xmm0.lo': ( 'uint', 'both', ),
1011 'xmm1.lo': ( 'uint', 'both', ),
1012 'xmm2.lo': ( 'uint', 'both', ),
1013 'xmm3.lo': ( 'uint', 'both', ),
1014 'xmm4.lo': ( 'uint', 'both', ),
1015 'xmm5.lo': ( 'uint', 'both', ),
1016 'xmm6.lo': ( 'uint', 'both', ),
1017 'xmm7.lo': ( 'uint', 'both', ),
1018 'xmm8.lo': ( 'uint', 'both', ),
1019 'xmm9.lo': ( 'uint', 'both', ),
1020 'xmm10.lo': ( 'uint', 'both', ),
1021 'xmm11.lo': ( 'uint', 'both', ),
1022 'xmm12.lo': ( 'uint', 'both', ),
1023 'xmm13.lo': ( 'uint', 'both', ),
1024 'xmm14.lo': ( 'uint', 'both', ),
1025 'xmm15.lo': ( 'uint', 'both', ),
1026 'xmm0.hi': ( 'uint', 'both', ),
1027 'xmm1.hi': ( 'uint', 'both', ),
1028 'xmm2.hi': ( 'uint', 'both', ),
1029 'xmm3.hi': ( 'uint', 'both', ),
1030 'xmm4.hi': ( 'uint', 'both', ),
1031 'xmm5.hi': ( 'uint', 'both', ),
1032 'xmm6.hi': ( 'uint', 'both', ),
1033 'xmm7.hi': ( 'uint', 'both', ),
1034 'xmm8.hi': ( 'uint', 'both', ),
1035 'xmm9.hi': ( 'uint', 'both', ),
1036 'xmm10.hi': ( 'uint', 'both', ),
1037 'xmm11.hi': ( 'uint', 'both', ),
1038 'xmm12.hi': ( 'uint', 'both', ),
1039 'xmm13.hi': ( 'uint', 'both', ),
1040 'xmm14.hi': ( 'uint', 'both', ),
1041 'xmm15.hi': ( 'uint', 'both', ),
1042 'xmm0.lo.zx': ( 'uint', 'both', ),
1043 'xmm1.lo.zx': ( 'uint', 'both', ),
1044 'xmm2.lo.zx': ( 'uint', 'both', ),
1045 'xmm3.lo.zx': ( 'uint', 'both', ),
1046 'xmm4.lo.zx': ( 'uint', 'both', ),
1047 'xmm5.lo.zx': ( 'uint', 'both', ),
1048 'xmm6.lo.zx': ( 'uint', 'both', ),
1049 'xmm7.lo.zx': ( 'uint', 'both', ),
1050 'xmm8.lo.zx': ( 'uint', 'both', ),
1051 'xmm9.lo.zx': ( 'uint', 'both', ),
1052 'xmm10.lo.zx': ( 'uint', 'both', ),
1053 'xmm11.lo.zx': ( 'uint', 'both', ),
1054 'xmm12.lo.zx': ( 'uint', 'both', ),
1055 'xmm13.lo.zx': ( 'uint', 'both', ),
1056 'xmm14.lo.zx': ( 'uint', 'both', ),
1057 'xmm15.lo.zx': ( 'uint', 'both', ),
1058 'xmm0.dw0': ( 'uint', 'both', ),
1059 'xmm1.dw0': ( 'uint', 'both', ),
1060 'xmm2.dw0': ( 'uint', 'both', ),
1061 'xmm3.dw0': ( 'uint', 'both', ),
1062 'xmm4.dw0': ( 'uint', 'both', ),
1063 'xmm5.dw0': ( 'uint', 'both', ),
1064 'xmm6.dw0': ( 'uint', 'both', ),
1065 'xmm7.dw0': ( 'uint', 'both', ),
1066 'xmm8.dw0': ( 'uint', 'both', ),
1067 'xmm9.dw0': ( 'uint', 'both', ),
1068 'xmm10.dw0': ( 'uint', 'both', ),
1069 'xmm11.dw0': ( 'uint', 'both', ),
1070 'xmm12.dw0': ( 'uint', 'both', ),
1071 'xmm13.dw0': ( 'uint', 'both', ),
1072 'xmm14.dw0': ( 'uint', 'both', ),
1073 'xmm15_dw0': ( 'uint', 'both', ),
1074 # AVX registers.
1075 'ymm0': ( 'uint', 'both', ),
1076 'ymm1': ( 'uint', 'both', ),
1077 'ymm2': ( 'uint', 'both', ),
1078 'ymm3': ( 'uint', 'both', ),
1079 'ymm4': ( 'uint', 'both', ),
1080 'ymm5': ( 'uint', 'both', ),
1081 'ymm6': ( 'uint', 'both', ),
1082 'ymm7': ( 'uint', 'both', ),
1083 'ymm8': ( 'uint', 'both', ),
1084 'ymm9': ( 'uint', 'both', ),
1085 'ymm10': ( 'uint', 'both', ),
1086 'ymm11': ( 'uint', 'both', ),
1087 'ymm12': ( 'uint', 'both', ),
1088 'ymm13': ( 'uint', 'both', ),
1089 'ymm14': ( 'uint', 'both', ),
1090 'ymm15': ( 'uint', 'both', ),
1091
1092 # Special ones.
1093 'value.xcpt': ( 'uint', 'output', ),
1094 };
1095
1096 def __init__(self, sField, sOp, sValue, sType):
1097 assert sField in self.kdFields;
1098 assert sOp in self.kasOperators;
1099 self.sField = sField;
1100 self.sOp = sOp;
1101 self.sValue = sValue;
1102 self.sType = sType;
1103 assert isinstance(sField, str);
1104 assert isinstance(sOp, str);
1105 assert isinstance(sType, str);
1106 assert isinstance(sValue, str);
1107
1108
1109class TestSelector(object):
1110 """
1111 One selector for an instruction test.
1112 """
1113 ## Selector compare operators.
1114 kasCompareOps = [ '==', '!=' ];
1115 ## Selector variables and their valid values.
1116 kdVariables = {
1117 # Operand size.
1118 'size': {
1119 'o16': 'size_o16',
1120 'o32': 'size_o32',
1121 'o64': 'size_o64',
1122 },
1123 # Execution ring.
1124 'ring': {
1125 '0': 'ring_0',
1126 '1': 'ring_1',
1127 '2': 'ring_2',
1128 '3': 'ring_3',
1129 '0..2': 'ring_0_thru_2',
1130 '1..3': 'ring_1_thru_3',
1131 },
1132 # Basic code mode.
1133 'codebits': {
1134 '64': 'code_64bit',
1135 '32': 'code_32bit',
1136 '16': 'code_16bit',
1137 },
1138 # cpu modes.
1139 'mode': {
1140 'real': 'mode_real',
1141 'prot': 'mode_prot',
1142 'long': 'mode_long',
1143 'v86': 'mode_v86',
1144 'smm': 'mode_smm',
1145 'vmx': 'mode_vmx',
1146 'svm': 'mode_svm',
1147 },
1148 # paging on/off
1149 'paging': {
1150 'on': 'paging_on',
1151 'off': 'paging_off',
1152 },
1153 # CPU vendor
1154 'vendor': {
1155 'amd': 'vendor_amd',
1156 'intel': 'vendor_intel',
1157 'via': 'vendor_via',
1158 },
1159 };
1160 ## Selector shorthand predicates.
1161 ## These translates into variable expressions.
1162 kdPredicates = {
1163 'o16': 'size==o16',
1164 'o32': 'size==o32',
1165 'o64': 'size==o64',
1166 'ring0': 'ring==0',
1167 '!ring0': 'ring==1..3',
1168 'ring1': 'ring==1',
1169 'ring2': 'ring==2',
1170 'ring3': 'ring==3',
1171 'user': 'ring==3',
1172 'supervisor': 'ring==0..2',
1173 'real': 'mode==real',
1174 'prot': 'mode==prot',
1175 'long': 'mode==long',
1176 'v86': 'mode==v86',
1177 'smm': 'mode==smm',
1178 'vmx': 'mode==vmx',
1179 'svm': 'mode==svm',
1180 'paging': 'paging==on',
1181 '!paging': 'paging==off',
1182 'amd': 'vendor==amd',
1183 '!amd': 'vendor!=amd',
1184 'intel': 'vendor==intel',
1185 '!intel': 'vendor!=intel',
1186 'via': 'vendor==via',
1187 '!via': 'vendor!=via',
1188 };
1189
1190 def __init__(self, sVariable, sOp, sValue):
1191 assert sVariable in self.kdVariables;
1192 assert sOp in self.kasCompareOps;
1193 assert sValue in self.kdVariables[sVariable];
1194 self.sVariable = sVariable;
1195 self.sOp = sOp;
1196 self.sValue = sValue;
1197
1198
1199class InstructionTest(object):
1200 """
1201 Instruction test.
1202 """
1203
1204 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1205 self.oInstr = oInstr; # type: InstructionTest
1206 self.aoInputs = []; # type: list(TestInOut)
1207 self.aoOutputs = []; # type: list(TestInOut)
1208 self.aoSelectors = []; # type: list(TestSelector)
1209
1210 def toString(self, fRepr = False):
1211 """
1212 Converts it to string representation.
1213 """
1214 asWords = [];
1215 if self.aoSelectors:
1216 for oSelector in self.aoSelectors:
1217 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1218 asWords.append('/');
1219
1220 for oModifier in self.aoInputs:
1221 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1222
1223 asWords.append('->');
1224
1225 for oModifier in self.aoOutputs:
1226 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1227
1228 if fRepr:
1229 return '<' + ' '.join(asWords) + '>';
1230 return ' '.join(asWords);
1231
1232 def __str__(self):
1233 """ Provide string represenation. """
1234 return self.toString(False);
1235
1236 def __repr__(self):
1237 """ Provide unambigious string representation. """
1238 return self.toString(True);
1239
1240class Operand(object):
1241 """
1242 Instruction operand.
1243 """
1244
1245 def __init__(self, sWhere, sType):
1246 assert sWhere in g_kdOpLocations, sWhere;
1247 assert sType in g_kdOpTypes, sType;
1248 self.sWhere = sWhere; ##< g_kdOpLocations
1249 self.sType = sType; ##< g_kdOpTypes
1250
1251 def usesModRM(self):
1252 """ Returns True if using some form of ModR/M encoding. """
1253 return self.sType[0] in ['E', 'G', 'M'];
1254
1255
1256
1257class Instruction(object): # pylint: disable=too-many-instance-attributes
1258 """
1259 Instruction.
1260 """
1261
1262 def __init__(self, sSrcFile, iLine):
1263 ## @name Core attributes.
1264 ## @{
1265 self.sMnemonic = None;
1266 self.sBrief = None;
1267 self.asDescSections = []; # type: list(str)
1268 self.aoMaps = []; # type: list(InstructionMap)
1269 self.aoOperands = []; # type: list(Operand)
1270 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1271 self.sOpcode = None; # type: str
1272 self.sSubOpcode = None; # type: str
1273 self.sEncoding = None;
1274 self.asFlTest = None;
1275 self.asFlModify = None;
1276 self.asFlUndefined = None;
1277 self.asFlSet = None;
1278 self.asFlClear = None;
1279 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1280 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1281 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1282 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1283 self.aoTests = []; # type: list(InstructionTest)
1284 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1285 self.oCpuExpr = None; ##< Some CPU restriction expression...
1286 self.sGroup = None;
1287 self.fUnused = False; ##< Unused instruction.
1288 self.fInvalid = False; ##< Invalid instruction (like UD2).
1289 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1290 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1291 ## @}
1292
1293 ## @name Implementation attributes.
1294 ## @{
1295 self.sStats = None;
1296 self.sFunction = None;
1297 self.fStub = False;
1298 self.fUdStub = False;
1299 ## @}
1300
1301 ## @name Decoding info
1302 ## @{
1303 self.sSrcFile = sSrcFile;
1304 self.iLineCreated = iLine;
1305 self.iLineCompleted = None;
1306 self.cOpTags = 0;
1307 self.iLineFnIemOpMacro = -1;
1308 self.iLineMnemonicMacro = -1;
1309 ## @}
1310
1311 ## @name Intermediate input fields.
1312 ## @{
1313 self.sRawDisOpNo = None;
1314 self.asRawDisParams = [];
1315 self.sRawIemOpFlags = None;
1316 self.sRawOldOpcodes = None;
1317 self.asCopyTests = [];
1318 ## @}
1319
1320 def toString(self, fRepr = False):
1321 """ Turn object into a string. """
1322 aasFields = [];
1323
1324 aasFields.append(['opcode', self.sOpcode]);
1325 aasFields.append(['mnemonic', self.sMnemonic]);
1326 for iOperand, oOperand in enumerate(self.aoOperands):
1327 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1328 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1329 aasFields.append(['encoding', self.sEncoding]);
1330 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1331 aasFields.append(['disenum', self.sDisEnum]);
1332 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1333 aasFields.append(['group', self.sGroup]);
1334 if self.fUnused: aasFields.append(['unused', 'True']);
1335 if self.fInvalid: aasFields.append(['invalid', 'True']);
1336 aasFields.append(['invlstyle', self.sInvalidStyle]);
1337 aasFields.append(['fltest', self.asFlTest]);
1338 aasFields.append(['flmodify', self.asFlModify]);
1339 aasFields.append(['flundef', self.asFlUndefined]);
1340 aasFields.append(['flset', self.asFlSet]);
1341 aasFields.append(['flclear', self.asFlClear]);
1342 aasFields.append(['mincpu', self.sMinCpu]);
1343 aasFields.append(['stats', self.sStats]);
1344 aasFields.append(['sFunction', self.sFunction]);
1345 if self.fStub: aasFields.append(['fStub', 'True']);
1346 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1347 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1348 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1349 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1350
1351 sRet = '<' if fRepr else '';
1352 for sField, sValue in aasFields:
1353 if sValue != None:
1354 if len(sRet) > 1:
1355 sRet += '; ';
1356 sRet += '%s=%s' % (sField, sValue,);
1357 if fRepr:
1358 sRet += '>';
1359
1360 return sRet;
1361
1362 def __str__(self):
1363 """ Provide string represenation. """
1364 return self.toString(False);
1365
1366 def __repr__(self):
1367 """ Provide unambigious string representation. """
1368 return self.toString(True);
1369
1370 def getOpcodeByte(self):
1371 """
1372 Decodes sOpcode into a byte range integer value.
1373 Raises exception if sOpcode is None or invalid.
1374 """
1375 if self.sOpcode is None:
1376 raise Exception('No opcode byte for %s!' % (self,));
1377 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1378
1379 # Full hex byte form.
1380 if sOpcode[:2] == '0x':
1381 return int(sOpcode, 16);
1382
1383 # The /r form:
1384 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1385 return int(sOpcode[-1:]) << 3;
1386
1387 # The 11/r form:
1388 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1389 return (int(sOpcode[-1:]) << 3) | 0xc0;
1390
1391 # The !11/r form (returns mod=1):
1392 ## @todo this doesn't really work...
1393 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1394 return (int(sOpcode[-1:]) << 3) | 0x80;
1395
1396 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1397
1398 @staticmethod
1399 def _flagsToIntegerMask(asFlags):
1400 """
1401 Returns the integer mask value for asFlags.
1402 """
1403 uRet = 0;
1404 if asFlags:
1405 for sFlag in asFlags:
1406 sConstant = g_kdEFlagsMnemonics[sFlag];
1407 assert sConstant[0] != '!', sConstant
1408 uRet |= g_kdX86EFlagsConstants[sConstant];
1409 return uRet;
1410
1411 def getTestedFlagsMask(self):
1412 """ Returns asFlTest into a integer mask value """
1413 return self._flagsToIntegerMask(self.asFlTest);
1414
1415 def getModifiedFlagsMask(self):
1416 """ Returns asFlModify into a integer mask value """
1417 return self._flagsToIntegerMask(self.asFlModify);
1418
1419 def getUndefinedFlagsMask(self):
1420 """ Returns asFlUndefined into a integer mask value """
1421 return self._flagsToIntegerMask(self.asFlUndefined);
1422
1423 def getSetFlagsMask(self):
1424 """ Returns asFlSet into a integer mask value """
1425 return self._flagsToIntegerMask(self.asFlSet);
1426
1427 def getClearedFlagsMask(self):
1428 """ Returns asFlClear into a integer mask value """
1429 return self._flagsToIntegerMask(self.asFlClear);
1430
1431
1432## All the instructions.
1433g_aoAllInstructions = []; # type: list(Instruction)
1434
1435## All the instructions indexed by statistics name (opstat).
1436g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1437
1438## All the instructions indexed by function name (opfunction).
1439g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1440
1441## Instructions tagged by oponlytest
1442g_aoOnlyTestInstructions = []; # type: list(Instruction)
1443
1444## Instruction maps.
1445g_dInstructionMaps = {
1446 'one': InstructionMap('one'),
1447 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1448 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1449 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1450 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1451 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1452 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1453 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1454 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1455 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1456 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1457 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1458 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1459 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1460 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1461 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1462 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1463 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1464 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1465 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1466
1467 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1468 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1469 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1470 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1471 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1472 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1473 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1474 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1475 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1476 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1477 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1478 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1479 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1480 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1481
1482 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1483 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1484
1485 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1486 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1487 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1488 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1489 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1490 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1491
1492 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1493 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1494
1495 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1496 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1497 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1498 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1499 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1500 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1501 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1502 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1503};
1504
1505
1506
1507class ParserException(Exception):
1508 """ Parser exception """
1509 def __init__(self, sMessage):
1510 Exception.__init__(self, sMessage);
1511
1512
1513class SimpleParser(object):
1514 """
1515 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1516 """
1517
1518 ## @name Parser state.
1519 ## @{
1520 kiCode = 0;
1521 kiCommentMulti = 1;
1522 ## @}
1523
1524 def __init__(self, sSrcFile, asLines, sDefaultMap):
1525 self.sSrcFile = sSrcFile;
1526 self.asLines = asLines;
1527 self.iLine = 0;
1528 self.iState = self.kiCode;
1529 self.sComment = '';
1530 self.iCommentLine = 0;
1531 self.aoCurInstrs = [];
1532
1533 assert sDefaultMap in g_dInstructionMaps;
1534 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1535
1536 self.cTotalInstr = 0;
1537 self.cTotalStubs = 0;
1538 self.cTotalTagged = 0;
1539
1540 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1541 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1542 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1543 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1544 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1545 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1546 self.fDebug = True;
1547
1548 self.dTagHandlers = {
1549 '@opbrief': self.parseTagOpBrief,
1550 '@opdesc': self.parseTagOpDesc,
1551 '@opmnemonic': self.parseTagOpMnemonic,
1552 '@op1': self.parseTagOpOperandN,
1553 '@op2': self.parseTagOpOperandN,
1554 '@op3': self.parseTagOpOperandN,
1555 '@op4': self.parseTagOpOperandN,
1556 '@oppfx': self.parseTagOpPfx,
1557 '@opmaps': self.parseTagOpMaps,
1558 '@opcode': self.parseTagOpcode,
1559 '@opcodesub': self.parseTagOpcodeSub,
1560 '@openc': self.parseTagOpEnc,
1561 '@opfltest': self.parseTagOpEFlags,
1562 '@opflmodify': self.parseTagOpEFlags,
1563 '@opflundef': self.parseTagOpEFlags,
1564 '@opflset': self.parseTagOpEFlags,
1565 '@opflclear': self.parseTagOpEFlags,
1566 '@ophints': self.parseTagOpHints,
1567 '@opdisenum': self.parseTagOpDisEnum,
1568 '@opmincpu': self.parseTagOpMinCpu,
1569 '@opcpuid': self.parseTagOpCpuId,
1570 '@opgroup': self.parseTagOpGroup,
1571 '@opunused': self.parseTagOpUnusedInvalid,
1572 '@opinvalid': self.parseTagOpUnusedInvalid,
1573 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1574 '@optest': self.parseTagOpTest,
1575 '@optestign': self.parseTagOpTestIgnore,
1576 '@optestignore': self.parseTagOpTestIgnore,
1577 '@opcopytests': self.parseTagOpCopyTests,
1578 '@oponly': self.parseTagOpOnlyTest,
1579 '@oponlytest': self.parseTagOpOnlyTest,
1580 '@opxcpttype': self.parseTagOpXcptType,
1581 '@opstats': self.parseTagOpStats,
1582 '@opfunction': self.parseTagOpFunction,
1583 '@opdone': self.parseTagOpDone,
1584 };
1585 for i in range(48):
1586 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1587 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1588
1589 self.asErrors = [];
1590
1591 def raiseError(self, sMessage):
1592 """
1593 Raise error prefixed with the source and line number.
1594 """
1595 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1596
1597 def raiseCommentError(self, iLineInComment, sMessage):
1598 """
1599 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1600 """
1601 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1602
1603 def error(self, sMessage):
1604 """
1605 Adds an error.
1606 returns False;
1607 """
1608 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1609 return False;
1610
1611 def errorComment(self, iLineInComment, sMessage):
1612 """
1613 Adds a comment error.
1614 returns False;
1615 """
1616 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1617 return False;
1618
1619 def printErrors(self):
1620 """
1621 Print the errors to stderr.
1622 Returns number of errors.
1623 """
1624 if self.asErrors:
1625 sys.stderr.write(u''.join(self.asErrors));
1626 return len(self.asErrors);
1627
1628 def debug(self, sMessage):
1629 """
1630 For debugging.
1631 """
1632 if self.fDebug:
1633 print('debug: %s' % (sMessage,));
1634
1635
1636 def addInstruction(self, iLine = None):
1637 """
1638 Adds an instruction.
1639 """
1640 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1641 g_aoAllInstructions.append(oInstr);
1642 self.aoCurInstrs.append(oInstr);
1643 return oInstr;
1644
1645 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1646 """
1647 Derives the mnemonic and operands from a IEM stats base name like string.
1648 """
1649 if oInstr.sMnemonic is None:
1650 asWords = sStats.split('_');
1651 oInstr.sMnemonic = asWords[0].lower();
1652 if len(asWords) > 1 and not oInstr.aoOperands:
1653 for sType in asWords[1:]:
1654 if sType in g_kdOpTypes:
1655 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1656 else:
1657 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1658 return False;
1659 return True;
1660
1661 def doneInstructionOne(self, oInstr, iLine):
1662 """
1663 Complete the parsing by processing, validating and expanding raw inputs.
1664 """
1665 assert oInstr.iLineCompleted is None;
1666 oInstr.iLineCompleted = iLine;
1667
1668 #
1669 # Specified instructions.
1670 #
1671 if oInstr.cOpTags > 0:
1672 if oInstr.sStats is None:
1673 pass;
1674
1675 #
1676 # Unspecified legacy stuff. We generally only got a few things to go on here.
1677 # /** Opcode 0x0f 0x00 /0. */
1678 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1679 #
1680 else:
1681 #if oInstr.sRawOldOpcodes:
1682 #
1683 #if oInstr.sMnemonic:
1684 pass;
1685
1686 #
1687 # Common defaults.
1688 #
1689
1690 # Guess mnemonic and operands from stats if the former is missing.
1691 if oInstr.sMnemonic is None:
1692 if oInstr.sStats is not None:
1693 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1694 elif oInstr.sFunction is not None:
1695 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1696
1697 # Derive the disassembler op enum constant from the mnemonic.
1698 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1699 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1700
1701 # Derive the IEM statistics base name from mnemonic and operand types.
1702 if oInstr.sStats is None:
1703 if oInstr.sFunction is not None:
1704 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1705 elif oInstr.sMnemonic is not None:
1706 oInstr.sStats = oInstr.sMnemonic;
1707 for oOperand in oInstr.aoOperands:
1708 if oOperand.sType:
1709 oInstr.sStats += '_' + oOperand.sType;
1710
1711 # Derive the IEM function name from mnemonic and operand types.
1712 if oInstr.sFunction is None:
1713 if oInstr.sMnemonic is not None:
1714 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1715 for oOperand in oInstr.aoOperands:
1716 if oOperand.sType:
1717 oInstr.sFunction += '_' + oOperand.sType;
1718 elif oInstr.sStats:
1719 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1720
1721 # Derive encoding from operands.
1722 if oInstr.sEncoding is None:
1723 if not oInstr.aoOperands:
1724 if oInstr.fUnused and oInstr.sSubOpcode:
1725 oInstr.sEncoding = 'ModR/M';
1726 else:
1727 oInstr.sEncoding = 'fixed';
1728 elif oInstr.aoOperands[0].usesModRM():
1729 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1730 oInstr.sEncoding = 'ModR/M+VEX';
1731 else:
1732 oInstr.sEncoding = 'ModR/M';
1733
1734 #
1735 # Apply default map and then add the instruction to all it's groups.
1736 #
1737 if not oInstr.aoMaps:
1738 oInstr.aoMaps = [ self.oDefaultMap, ];
1739 for oMap in oInstr.aoMaps:
1740 oMap.aoInstructions.append(oInstr);
1741
1742 #
1743 # Check the opstat value and add it to the opstat indexed dictionary.
1744 #
1745 if oInstr.sStats:
1746 if oInstr.sStats not in g_dAllInstructionsByStat:
1747 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1748 else:
1749 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1750 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1751
1752 #
1753 # Add to function indexed dictionary. We allow multiple instructions per function.
1754 #
1755 if oInstr.sFunction:
1756 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1757 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1758 else:
1759 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1760
1761 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1762 return True;
1763
1764 def doneInstructions(self, iLineInComment = None):
1765 """
1766 Done with current instruction.
1767 """
1768 for oInstr in self.aoCurInstrs:
1769 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1770 if oInstr.fStub:
1771 self.cTotalStubs += 1;
1772
1773 self.cTotalInstr += len(self.aoCurInstrs);
1774
1775 self.sComment = '';
1776 self.aoCurInstrs = [];
1777 return True;
1778
1779 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1780 """
1781 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1782 is False, only None values and empty strings are replaced.
1783 """
1784 for oInstr in self.aoCurInstrs:
1785 if fOverwrite is not True:
1786 oOldValue = getattr(oInstr, sAttrib);
1787 if oOldValue is not None:
1788 continue;
1789 setattr(oInstr, sAttrib, oValue);
1790
1791 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1792 """
1793 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1794 If fOverwrite is False, only None values and empty strings are replaced.
1795 """
1796 for oInstr in self.aoCurInstrs:
1797 aoArray = getattr(oInstr, sAttrib);
1798 while len(aoArray) <= iEntry:
1799 aoArray.append(None);
1800 if fOverwrite is True or aoArray[iEntry] is None:
1801 aoArray[iEntry] = oValue;
1802
1803 def parseCommentOldOpcode(self, asLines):
1804 """ Deals with 'Opcode 0xff /4' like comments """
1805 asWords = asLines[0].split();
1806 if len(asWords) >= 2 \
1807 and asWords[0] == 'Opcode' \
1808 and ( asWords[1].startswith('0x')
1809 or asWords[1].startswith('0X')):
1810 asWords = asWords[:1];
1811 for iWord, sWord in enumerate(asWords):
1812 if sWord.startswith('0X'):
1813 sWord = '0x' + sWord[:2];
1814 asWords[iWord] = asWords;
1815 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1816
1817 return False;
1818
1819 def ensureInstructionForOpTag(self, iTagLine):
1820 """ Ensure there is an instruction for the op-tag being parsed. """
1821 if not self.aoCurInstrs:
1822 self.addInstruction(self.iCommentLine + iTagLine);
1823 for oInstr in self.aoCurInstrs:
1824 oInstr.cOpTags += 1;
1825 if oInstr.cOpTags == 1:
1826 self.cTotalTagged += 1;
1827 return self.aoCurInstrs[-1];
1828
1829 @staticmethod
1830 def flattenSections(aasSections):
1831 """
1832 Flattens multiline sections into stripped single strings.
1833 Returns list of strings, on section per string.
1834 """
1835 asRet = [];
1836 for asLines in aasSections:
1837 if asLines:
1838 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1839 return asRet;
1840
1841 @staticmethod
1842 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1843 """
1844 Flattens sections into a simple stripped string with newlines as
1845 section breaks. The final section does not sport a trailing newline.
1846 """
1847 # Typical: One section with a single line.
1848 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1849 return aasSections[0][0].strip();
1850
1851 sRet = '';
1852 for iSection, asLines in enumerate(aasSections):
1853 if asLines:
1854 if iSection > 0:
1855 sRet += sSectionSep;
1856 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1857 return sRet;
1858
1859
1860
1861 ## @name Tag parsers
1862 ## @{
1863
1864 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1865 """
1866 Tag: \@opbrief
1867 Value: Text description, multiple sections, appended.
1868
1869 Brief description. If not given, it's the first sentence from @opdesc.
1870 """
1871 oInstr = self.ensureInstructionForOpTag(iTagLine);
1872
1873 # Flatten and validate the value.
1874 sBrief = self.flattenAllSections(aasSections);
1875 if not sBrief:
1876 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1877 if sBrief[-1] != '.':
1878 sBrief = sBrief + '.';
1879 if len(sBrief) > 180:
1880 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1881 offDot = sBrief.find('.');
1882 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1883 offDot = sBrief.find('.', offDot + 1);
1884 if offDot >= 0 and offDot != len(sBrief) - 1:
1885 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1886
1887 # Update the instruction.
1888 if oInstr.sBrief is not None:
1889 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1890 % (sTag, oInstr.sBrief, sBrief,));
1891 _ = iEndLine;
1892 return True;
1893
1894 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1895 """
1896 Tag: \@opdesc
1897 Value: Text description, multiple sections, appended.
1898
1899 It is used to describe instructions.
1900 """
1901 oInstr = self.ensureInstructionForOpTag(iTagLine);
1902 if aasSections:
1903 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1904 return True;
1905
1906 _ = sTag; _ = iEndLine;
1907 return True;
1908
1909 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1910 """
1911 Tag: @opmenmonic
1912 Value: mnemonic
1913
1914 The 'mnemonic' value must be a valid C identifier string. Because of
1915 prefixes, groups and whatnot, there times when the mnemonic isn't that
1916 of an actual assembler mnemonic.
1917 """
1918 oInstr = self.ensureInstructionForOpTag(iTagLine);
1919
1920 # Flatten and validate the value.
1921 sMnemonic = self.flattenAllSections(aasSections);
1922 if not self.oReMnemonic.match(sMnemonic):
1923 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1924 if oInstr.sMnemonic is not None:
1925 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1926 % (sTag, oInstr.sMnemonic, sMnemonic,));
1927 oInstr.sMnemonic = sMnemonic
1928
1929 _ = iEndLine;
1930 return True;
1931
1932 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1933 """
1934 Tags: \@op1, \@op2, \@op3, \@op4
1935 Value: [where:]type
1936
1937 The 'where' value indicates where the operand is found, like the 'reg'
1938 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1939 a list.
1940
1941 The 'type' value indicates the operand type. These follow the types
1942 given in the opcode tables in the CPU reference manuals.
1943 See Instruction.kdOperandTypes for a list.
1944
1945 """
1946 oInstr = self.ensureInstructionForOpTag(iTagLine);
1947 idxOp = int(sTag[-1]) - 1;
1948 assert idxOp >= 0 and idxOp < 4;
1949
1950 # flatten, split up, and validate the "where:type" value.
1951 sFlattened = self.flattenAllSections(aasSections);
1952 asSplit = sFlattened.split(':');
1953 if len(asSplit) == 1:
1954 sType = asSplit[0];
1955 sWhere = None;
1956 elif len(asSplit) == 2:
1957 (sWhere, sType) = asSplit;
1958 else:
1959 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1960
1961 if sType not in g_kdOpTypes:
1962 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1963 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1964 if sWhere is None:
1965 sWhere = g_kdOpTypes[sType][1];
1966 elif sWhere not in g_kdOpLocations:
1967 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1968 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1969
1970 # Insert the operand, refusing to overwrite an existing one.
1971 while idxOp >= len(oInstr.aoOperands):
1972 oInstr.aoOperands.append(None);
1973 if oInstr.aoOperands[idxOp] is not None:
1974 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1975 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1976 sWhere, sType,));
1977 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1978
1979 _ = iEndLine;
1980 return True;
1981
1982 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1983 """
1984 Tag: \@opmaps
1985 Value: map[,map2]
1986
1987 Indicates which maps the instruction is in. There is a default map
1988 associated with each input file.
1989 """
1990 oInstr = self.ensureInstructionForOpTag(iTagLine);
1991
1992 # Flatten, split up and validate the value.
1993 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1994 asMaps = sFlattened.split(',');
1995 if not asMaps:
1996 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1997 for sMap in asMaps:
1998 if sMap not in g_dInstructionMaps:
1999 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2000 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2001
2002 # Add the maps to the current list. Throw errors on duplicates.
2003 for oMap in oInstr.aoMaps:
2004 if oMap.sName in asMaps:
2005 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2006
2007 for sMap in asMaps:
2008 oMap = g_dInstructionMaps[sMap];
2009 if oMap not in oInstr.aoMaps:
2010 oInstr.aoMaps.append(oMap);
2011 else:
2012 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2013
2014 _ = iEndLine;
2015 return True;
2016
2017 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2018 """
2019 Tag: \@oppfx
2020 Value: n/a|none|0x66|0xf3|0xf2
2021
2022 Required prefix for the instruction. (In a (E)VEX context this is the
2023 value of the 'pp' field rather than an actual prefix.)
2024 """
2025 oInstr = self.ensureInstructionForOpTag(iTagLine);
2026
2027 # Flatten and validate the value.
2028 sFlattened = self.flattenAllSections(aasSections);
2029 asPrefixes = sFlattened.split();
2030 if len(asPrefixes) > 1:
2031 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2032
2033 sPrefix = asPrefixes[0].lower();
2034 if sPrefix == 'none':
2035 sPrefix = 'none';
2036 elif sPrefix == 'n/a':
2037 sPrefix = None;
2038 else:
2039 if len(sPrefix) == 2:
2040 sPrefix = '0x' + sPrefix;
2041 if not _isValidOpcodeByte(sPrefix):
2042 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2043
2044 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2045 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2046
2047 # Set it.
2048 if oInstr.sPrefix is not None:
2049 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2050 oInstr.sPrefix = sPrefix;
2051
2052 _ = iEndLine;
2053 return True;
2054
2055 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2056 """
2057 Tag: \@opcode
2058 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2059
2060 The opcode byte or sub-byte for the instruction in the context of a map.
2061 """
2062 oInstr = self.ensureInstructionForOpTag(iTagLine);
2063
2064 # Flatten and validate the value.
2065 sOpcode = self.flattenAllSections(aasSections);
2066 if _isValidOpcodeByte(sOpcode):
2067 pass;
2068 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2069 pass;
2070 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2071 pass;
2072 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2073 pass;
2074 else:
2075 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2076
2077 # Set it.
2078 if oInstr.sOpcode is not None:
2079 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2080 oInstr.sOpcode = sOpcode;
2081
2082 _ = iEndLine;
2083 return True;
2084
2085 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2086 """
2087 Tag: \@opcodesub
2088 Value: none | 11 mr/reg | !11 mr/reg
2089
2090 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2091 represents exactly two different instructions. The more proper way would
2092 be to go via maps with two members, but this is faster.
2093 """
2094 oInstr = self.ensureInstructionForOpTag(iTagLine);
2095
2096 # Flatten and validate the value.
2097 sSubOpcode = self.flattenAllSections(aasSections);
2098 if sSubOpcode not in g_kdSubOpcodes:
2099 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2100 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2101
2102 # Set it.
2103 if oInstr.sSubOpcode is not None:
2104 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2105 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2106 oInstr.sSubOpcode = sSubOpcode;
2107
2108 _ = iEndLine;
2109 return True;
2110
2111 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2112 """
2113 Tag: \@openc
2114 Value: ModR/M|fixed|prefix|<map name>
2115
2116 The instruction operand encoding style.
2117 """
2118 oInstr = self.ensureInstructionForOpTag(iTagLine);
2119
2120 # Flatten and validate the value.
2121 sEncoding = self.flattenAllSections(aasSections);
2122 if sEncoding in g_kdEncodings:
2123 pass;
2124 elif sEncoding in g_dInstructionMaps:
2125 pass;
2126 elif not _isValidOpcodeByte(sEncoding):
2127 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2128
2129 # Set it.
2130 if oInstr.sEncoding is not None:
2131 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2132 % ( sTag, oInstr.sEncoding, sEncoding,));
2133 oInstr.sEncoding = sEncoding;
2134
2135 _ = iEndLine;
2136 return True;
2137
2138 ## EFlags tag to Instruction attribute name.
2139 kdOpFlagToAttr = {
2140 '@opfltest': 'asFlTest',
2141 '@opflmodify': 'asFlModify',
2142 '@opflundef': 'asFlUndefined',
2143 '@opflset': 'asFlSet',
2144 '@opflclear': 'asFlClear',
2145 };
2146
2147 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2148 """
2149 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2150 Value: <eflags specifier>
2151
2152 """
2153 oInstr = self.ensureInstructionForOpTag(iTagLine);
2154
2155 # Flatten, split up and validate the values.
2156 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2157 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2158 asFlags = [];
2159 else:
2160 fRc = True;
2161 for iFlag, sFlag in enumerate(asFlags):
2162 if sFlag not in g_kdEFlagsMnemonics:
2163 if sFlag.strip() in g_kdEFlagsMnemonics:
2164 asFlags[iFlag] = sFlag.strip();
2165 else:
2166 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2167 if not fRc:
2168 return False;
2169
2170 # Set them.
2171 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2172 if asOld is not None:
2173 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2174 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2175
2176 _ = iEndLine;
2177 return True;
2178
2179 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2180 """
2181 Tag: \@ophints
2182 Value: Comma or space separated list of flags and hints.
2183
2184 This covers the disassembler flags table and more.
2185 """
2186 oInstr = self.ensureInstructionForOpTag(iTagLine);
2187
2188 # Flatten as a space separated list, split it up and validate the values.
2189 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2190 if len(asHints) == 1 and asHints[0].lower() == 'none':
2191 asHints = [];
2192 else:
2193 fRc = True;
2194 for iHint, sHint in enumerate(asHints):
2195 if sHint not in g_kdHints:
2196 if sHint.strip() in g_kdHints:
2197 sHint[iHint] = sHint.strip();
2198 else:
2199 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2200 if not fRc:
2201 return False;
2202
2203 # Append them.
2204 for sHint in asHints:
2205 if sHint not in oInstr.dHints:
2206 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2207 else:
2208 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2209
2210 _ = iEndLine;
2211 return True;
2212
2213 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2214 """
2215 Tag: \@opdisenum
2216 Value: OP_XXXX
2217
2218 This is for select a specific (legacy) disassembler enum value for the
2219 instruction.
2220 """
2221 oInstr = self.ensureInstructionForOpTag(iTagLine);
2222
2223 # Flatten and split.
2224 asWords = self.flattenAllSections(aasSections).split();
2225 if len(asWords) != 1:
2226 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2227 if not asWords:
2228 return False;
2229 sDisEnum = asWords[0];
2230 if not self.oReDisEnum.match(sDisEnum):
2231 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2232 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2233
2234 # Set it.
2235 if oInstr.sDisEnum is not None:
2236 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2237 oInstr.sDisEnum = sDisEnum;
2238
2239 _ = iEndLine;
2240 return True;
2241
2242 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2243 """
2244 Tag: \@opmincpu
2245 Value: <simple CPU name>
2246
2247 Indicates when this instruction was introduced.
2248 """
2249 oInstr = self.ensureInstructionForOpTag(iTagLine);
2250
2251 # Flatten the value, split into words, make sure there's just one, valid it.
2252 asCpus = self.flattenAllSections(aasSections).split();
2253 if len(asCpus) > 1:
2254 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2255
2256 sMinCpu = asCpus[0];
2257 if sMinCpu in g_kdCpuNames:
2258 oInstr.sMinCpu = sMinCpu;
2259 else:
2260 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2261 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2262
2263 # Set it.
2264 if oInstr.sMinCpu is None:
2265 oInstr.sMinCpu = sMinCpu;
2266 elif oInstr.sMinCpu != sMinCpu:
2267 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2268
2269 _ = iEndLine;
2270 return True;
2271
2272 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2273 """
2274 Tag: \@opcpuid
2275 Value: none | <CPUID flag specifier>
2276
2277 CPUID feature bit which is required for the instruction to be present.
2278 """
2279 oInstr = self.ensureInstructionForOpTag(iTagLine);
2280
2281 # Flatten as a space separated list, split it up and validate the values.
2282 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2283 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2284 asCpuIds = [];
2285 else:
2286 fRc = True;
2287 for iCpuId, sCpuId in enumerate(asCpuIds):
2288 if sCpuId not in g_kdCpuIdFlags:
2289 if sCpuId.strip() in g_kdCpuIdFlags:
2290 sCpuId[iCpuId] = sCpuId.strip();
2291 else:
2292 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2293 if not fRc:
2294 return False;
2295
2296 # Append them.
2297 for sCpuId in asCpuIds:
2298 if sCpuId not in oInstr.asCpuIds:
2299 oInstr.asCpuIds.append(sCpuId);
2300 else:
2301 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2302
2303 _ = iEndLine;
2304 return True;
2305
2306 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2307 """
2308 Tag: \@opgroup
2309 Value: op_grp1[_subgrp2[_subsubgrp3]]
2310
2311 Instruction grouping.
2312 """
2313 oInstr = self.ensureInstructionForOpTag(iTagLine);
2314
2315 # Flatten as a space separated list, split it up and validate the values.
2316 asGroups = self.flattenAllSections(aasSections).split();
2317 if len(asGroups) != 1:
2318 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2319 sGroup = asGroups[0];
2320 if not self.oReGroupName.match(sGroup):
2321 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2322 % (sTag, sGroup, self.oReGroupName.pattern));
2323
2324 # Set it.
2325 if oInstr.sGroup is not None:
2326 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2327 oInstr.sGroup = sGroup;
2328
2329 _ = iEndLine;
2330 return True;
2331
2332 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2333 """
2334 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2335 Value: <invalid opcode behaviour style>
2336
2337 The \@opunused indicates the specification is for a currently unused
2338 instruction encoding.
2339
2340 The \@opinvalid indicates the specification is for an invalid currently
2341 instruction encoding (like UD2).
2342
2343 The \@opinvlstyle just indicates how CPUs decode the instruction when
2344 not supported (\@opcpuid, \@opmincpu) or disabled.
2345 """
2346 oInstr = self.ensureInstructionForOpTag(iTagLine);
2347
2348 # Flatten as a space separated list, split it up and validate the values.
2349 asStyles = self.flattenAllSections(aasSections).split();
2350 if len(asStyles) != 1:
2351 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2352 sStyle = asStyles[0];
2353 if sStyle not in g_kdInvalidStyles:
2354 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2355 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2356 # Set it.
2357 if oInstr.sInvalidStyle is not None:
2358 return self.errorComment(iTagLine,
2359 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2360 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2361 oInstr.sInvalidStyle = sStyle;
2362 if sTag == '@opunused':
2363 oInstr.fUnused = True;
2364 elif sTag == '@opinvalid':
2365 oInstr.fInvalid = True;
2366
2367 _ = iEndLine;
2368 return True;
2369
2370 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2371 """
2372 Tag: \@optest
2373 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2374 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2375
2376 The main idea here is to generate basic instruction tests.
2377
2378 The probably simplest way of handling the diverse input, would be to use
2379 it to produce size optimized byte code for a simple interpreter that
2380 modifies the register input and output states.
2381
2382 An alternative to the interpreter would be creating multiple tables,
2383 but that becomes rather complicated wrt what goes where and then to use
2384 them in an efficient manner.
2385 """
2386 oInstr = self.ensureInstructionForOpTag(iTagLine);
2387
2388 #
2389 # Do it section by section.
2390 #
2391 for asSectionLines in aasSections:
2392 #
2393 # Sort the input into outputs, inputs and selector conditions.
2394 #
2395 sFlatSection = self.flattenAllSections([asSectionLines,]);
2396 if not sFlatSection:
2397 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2398 continue;
2399 oTest = InstructionTest(oInstr);
2400
2401 asSelectors = [];
2402 asInputs = [];
2403 asOutputs = [];
2404 asCur = asOutputs;
2405 fRc = True;
2406 asWords = sFlatSection.split();
2407 for iWord in range(len(asWords) - 1, -1, -1):
2408 sWord = asWords[iWord];
2409 # Check for array switchers.
2410 if sWord == '->':
2411 if asCur != asOutputs:
2412 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2413 break;
2414 asCur = asInputs;
2415 elif sWord == '/':
2416 if asCur != asInputs:
2417 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2418 break;
2419 asCur = asSelectors;
2420 else:
2421 asCur.insert(0, sWord);
2422
2423 #
2424 # Validate and add selectors.
2425 #
2426 for sCond in asSelectors:
2427 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2428 oSelector = None;
2429 for sOp in TestSelector.kasCompareOps:
2430 off = sCondExp.find(sOp);
2431 if off >= 0:
2432 sVariable = sCondExp[:off];
2433 sValue = sCondExp[off + len(sOp):];
2434 if sVariable in TestSelector.kdVariables:
2435 if sValue in TestSelector.kdVariables[sVariable]:
2436 oSelector = TestSelector(sVariable, sOp, sValue);
2437 else:
2438 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2439 % ( sTag, sValue, sCond,
2440 TestSelector.kdVariables[sVariable].keys(),));
2441 else:
2442 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2443 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2444 break;
2445 if oSelector is not None:
2446 for oExisting in oTest.aoSelectors:
2447 if oExisting.sVariable == oSelector.sVariable:
2448 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2449 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2450 oTest.aoSelectors.append(oSelector);
2451 else:
2452 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2453
2454 #
2455 # Validate outputs and inputs, adding them to the test as we go along.
2456 #
2457 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2458 asValidFieldKinds = [ 'both', sDesc, ];
2459 for sItem in asItems:
2460 oItem = None;
2461 for sOp in TestInOut.kasOperators:
2462 off = sItem.find(sOp);
2463 if off < 0:
2464 continue;
2465 sField = sItem[:off];
2466 sValueType = sItem[off + len(sOp):];
2467 if sField in TestInOut.kdFields \
2468 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2469 asSplit = sValueType.split(':', 1);
2470 sValue = asSplit[0];
2471 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2472 if sType in TestInOut.kdTypes:
2473 oValid = TestInOut.kdTypes[sType].validate(sValue);
2474 if oValid is True:
2475 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2476 oItem = TestInOut(sField, sOp, sValue, sType);
2477 else:
2478 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2479 % ( sTag, sDesc, sItem, ));
2480 else:
2481 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2482 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2483 else:
2484 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2485 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2486 else:
2487 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2488 % ( sTag, sDesc, sField, sItem,
2489 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2490 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2491 break;
2492 if oItem is not None:
2493 for oExisting in aoDst:
2494 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2495 self.errorComment(iTagLine,
2496 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2497 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2498 aoDst.append(oItem);
2499 else:
2500 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2501
2502 #
2503 # .
2504 #
2505 if fRc:
2506 oInstr.aoTests.append(oTest);
2507 else:
2508 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2509 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2510 % (sTag, asSelectors, asInputs, asOutputs,));
2511
2512 _ = iEndLine;
2513 return True;
2514
2515 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2516 """
2517 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2518 """
2519 oInstr = self.ensureInstructionForOpTag(iTagLine);
2520
2521 iTest = 0;
2522 if sTag[-1] == ']':
2523 iTest = int(sTag[8:-1]);
2524 else:
2525 iTest = int(sTag[7:]);
2526
2527 if iTest != len(oInstr.aoTests):
2528 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2529 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2530
2531 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2532 """
2533 Tag: \@optestign | \@optestignore
2534 Value: <value is ignored>
2535
2536 This is a simple trick to ignore a test while debugging another.
2537
2538 See also \@oponlytest.
2539 """
2540 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2541 return True;
2542
2543 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2544 """
2545 Tag: \@opcopytests
2546 Value: <opstat | function> [..]
2547 Example: \@opcopytests add_Eb_Gb
2548
2549 Trick to avoid duplicating tests for different encodings of the same
2550 operation.
2551 """
2552 oInstr = self.ensureInstructionForOpTag(iTagLine);
2553
2554 # Flatten, validate and append the copy job to the instruction. We execute
2555 # them after parsing all the input so we can handle forward references.
2556 asToCopy = self.flattenAllSections(aasSections).split();
2557 if not asToCopy:
2558 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2559 for sToCopy in asToCopy:
2560 if sToCopy not in oInstr.asCopyTests:
2561 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2562 oInstr.asCopyTests.append(sToCopy);
2563 else:
2564 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2565 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2566 else:
2567 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2568
2569 _ = iEndLine;
2570 return True;
2571
2572 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2573 """
2574 Tag: \@oponlytest | \@oponly
2575 Value: none
2576
2577 Only test instructions with this tag. This is a trick that is handy
2578 for singling out one or two new instructions or tests.
2579
2580 See also \@optestignore.
2581 """
2582 oInstr = self.ensureInstructionForOpTag(iTagLine);
2583
2584 # Validate and add instruction to only test dictionary.
2585 sValue = self.flattenAllSections(aasSections).strip();
2586 if sValue:
2587 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2588
2589 if oInstr not in g_aoOnlyTestInstructions:
2590 g_aoOnlyTestInstructions.append(oInstr);
2591
2592 _ = iEndLine;
2593 return True;
2594
2595 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2596 """
2597 Tag: \@opxcpttype
2598 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2599
2600 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2601 """
2602 oInstr = self.ensureInstructionForOpTag(iTagLine);
2603
2604 # Flatten as a space separated list, split it up and validate the values.
2605 asTypes = self.flattenAllSections(aasSections).split();
2606 if len(asTypes) != 1:
2607 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2608 sType = asTypes[0];
2609 if sType not in g_kdXcptTypes:
2610 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2611 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2612 # Set it.
2613 if oInstr.sXcptType is not None:
2614 return self.errorComment(iTagLine,
2615 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2616 % ( sTag, oInstr.sXcptType, sType,));
2617 oInstr.sXcptType = sType;
2618
2619 _ = iEndLine;
2620 return True;
2621
2622 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2623 """
2624 Tag: \@opfunction
2625 Value: <VMM function name>
2626
2627 This is for explicitly setting the IEM function name. Normally we pick
2628 this up from the FNIEMOP_XXX macro invocation after the description, or
2629 generate it from the mnemonic and operands.
2630
2631 It it thought it maybe necessary to set it when specifying instructions
2632 which implementation isn't following immediately or aren't implemented yet.
2633 """
2634 oInstr = self.ensureInstructionForOpTag(iTagLine);
2635
2636 # Flatten and validate the value.
2637 sFunction = self.flattenAllSections(aasSections);
2638 if not self.oReFunctionName.match(sFunction):
2639 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2640 % (sTag, sFunction, self.oReFunctionName.pattern));
2641
2642 if oInstr.sFunction is not None:
2643 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2644 % (sTag, oInstr.sFunction, sFunction,));
2645 oInstr.sFunction = sFunction;
2646
2647 _ = iEndLine;
2648 return True;
2649
2650 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2651 """
2652 Tag: \@opstats
2653 Value: <VMM statistics base name>
2654
2655 This is for explicitly setting the statistics name. Normally we pick
2656 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2657 the mnemonic and operands.
2658
2659 It it thought it maybe necessary to set it when specifying instructions
2660 which implementation isn't following immediately or aren't implemented yet.
2661 """
2662 oInstr = self.ensureInstructionForOpTag(iTagLine);
2663
2664 # Flatten and validate the value.
2665 sStats = self.flattenAllSections(aasSections);
2666 if not self.oReStatsName.match(sStats):
2667 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2668 % (sTag, sStats, self.oReStatsName.pattern));
2669
2670 if oInstr.sStats is not None:
2671 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2672 % (sTag, oInstr.sStats, sStats,));
2673 oInstr.sStats = sStats;
2674
2675 _ = iEndLine;
2676 return True;
2677
2678 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2679 """
2680 Tag: \@opdone
2681 Value: none
2682
2683 Used to explictily flush the instructions that have been specified.
2684 """
2685 sFlattened = self.flattenAllSections(aasSections);
2686 if sFlattened != '':
2687 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2688 _ = sTag; _ = iEndLine;
2689 return self.doneInstructions();
2690
2691 ## @}
2692
2693
2694 def parseComment(self):
2695 """
2696 Parse the current comment (self.sComment).
2697
2698 If it's a opcode specifiying comment, we reset the macro stuff.
2699 """
2700 #
2701 # Reject if comment doesn't seem to contain anything interesting.
2702 #
2703 if self.sComment.find('Opcode') < 0 \
2704 and self.sComment.find('@') < 0:
2705 return False;
2706
2707 #
2708 # Split the comment into lines, removing leading asterisks and spaces.
2709 # Also remove leading and trailing empty lines.
2710 #
2711 asLines = self.sComment.split('\n');
2712 for iLine, sLine in enumerate(asLines):
2713 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2714
2715 while asLines and not asLines[0]:
2716 self.iCommentLine += 1;
2717 asLines.pop(0);
2718
2719 while asLines and not asLines[-1]:
2720 asLines.pop(len(asLines) - 1);
2721
2722 #
2723 # Check for old style: Opcode 0x0f 0x12
2724 #
2725 if asLines[0].startswith('Opcode '):
2726 self.parseCommentOldOpcode(asLines);
2727
2728 #
2729 # Look for @op* tagged data.
2730 #
2731 cOpTags = 0;
2732 sFlatDefault = None;
2733 sCurTag = '@default';
2734 iCurTagLine = 0;
2735 asCurSection = [];
2736 aasSections = [ asCurSection, ];
2737 for iLine, sLine in enumerate(asLines):
2738 if not sLine.startswith('@'):
2739 if sLine:
2740 asCurSection.append(sLine);
2741 elif asCurSection:
2742 asCurSection = [];
2743 aasSections.append(asCurSection);
2744 else:
2745 #
2746 # Process the previous tag.
2747 #
2748 if not asCurSection and len(aasSections) > 1:
2749 aasSections.pop(-1);
2750 if sCurTag in self.dTagHandlers:
2751 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2752 cOpTags += 1;
2753 elif sCurTag.startswith('@op'):
2754 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2755 elif sCurTag == '@default':
2756 sFlatDefault = self.flattenAllSections(aasSections);
2757 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2758 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2759 elif sCurTag in ['@encoding', '@opencoding']:
2760 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2761
2762 #
2763 # New tag.
2764 #
2765 asSplit = sLine.split(None, 1);
2766 sCurTag = asSplit[0].lower();
2767 if len(asSplit) > 1:
2768 asCurSection = [asSplit[1],];
2769 else:
2770 asCurSection = [];
2771 aasSections = [asCurSection, ];
2772 iCurTagLine = iLine;
2773
2774 #
2775 # Process the final tag.
2776 #
2777 if not asCurSection and len(aasSections) > 1:
2778 aasSections.pop(-1);
2779 if sCurTag in self.dTagHandlers:
2780 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2781 cOpTags += 1;
2782 elif sCurTag.startswith('@op'):
2783 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2784 elif sCurTag == '@default':
2785 sFlatDefault = self.flattenAllSections(aasSections);
2786
2787 #
2788 # Don't allow default text in blocks containing @op*.
2789 #
2790 if cOpTags > 0 and sFlatDefault:
2791 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2792
2793 return True;
2794
2795 def parseMacroInvocation(self, sInvocation):
2796 """
2797 Parses a macro invocation.
2798
2799 Returns a tuple, first element is the offset following the macro
2800 invocation. The second element is a list of macro arguments, where the
2801 zero'th is the macro name.
2802 """
2803 # First the name.
2804 offOpen = sInvocation.find('(');
2805 if offOpen <= 0:
2806 self.raiseError("macro invocation open parenthesis not found");
2807 sName = sInvocation[:offOpen].strip();
2808 if not self.oReMacroName.match(sName):
2809 return self.error("invalid macro name '%s'" % (sName,));
2810 asRet = [sName, ];
2811
2812 # Arguments.
2813 iLine = self.iLine;
2814 cDepth = 1;
2815 off = offOpen + 1;
2816 offStart = off;
2817 while cDepth > 0:
2818 if off >= len(sInvocation):
2819 if iLine >= len(self.asLines):
2820 return self.error('macro invocation beyond end of file');
2821 sInvocation += self.asLines[iLine];
2822 iLine += 1;
2823 ch = sInvocation[off];
2824
2825 if ch == ',' or ch == ')':
2826 if cDepth == 1:
2827 asRet.append(sInvocation[offStart:off].strip());
2828 offStart = off + 1;
2829 if ch == ')':
2830 cDepth -= 1;
2831 elif ch == '(':
2832 cDepth += 1;
2833 off += 1;
2834
2835 return (off, asRet);
2836
2837 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2838 """
2839 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2840 """
2841 offHit = sCode.find(sMacro);
2842 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2843 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2844 return (offHit + offAfter, asRet);
2845 return (len(sCode), None);
2846
2847 def findAndParseMacroInvocation(self, sCode, sMacro):
2848 """
2849 Returns None if not found, arguments as per parseMacroInvocation if found.
2850 """
2851 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2852
2853 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2854 """
2855 Returns same as findAndParseMacroInvocation.
2856 """
2857 for sMacro in asMacro:
2858 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2859 if asRet is not None:
2860 return asRet;
2861 return None;
2862
2863 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2864 sDisHints, sIemHints, asOperands):
2865 """
2866 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2867 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2868 """
2869 #
2870 # Some invocation checks.
2871 #
2872 if sUpper != sUpper.upper():
2873 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2874 if sLower != sLower.lower():
2875 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2876 if sUpper.lower() != sLower:
2877 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2878 if not self.oReMnemonic.match(sLower):
2879 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2880
2881 #
2882 # Check if sIemHints tells us to not consider this macro invocation.
2883 #
2884 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2885 return True;
2886
2887 # Apply to the last instruction only for now.
2888 if not self.aoCurInstrs:
2889 self.addInstruction();
2890 oInstr = self.aoCurInstrs[-1];
2891 if oInstr.iLineMnemonicMacro == -1:
2892 oInstr.iLineMnemonicMacro = self.iLine;
2893 else:
2894 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2895 % (sMacro, oInstr.iLineMnemonicMacro,));
2896
2897 # Mnemonic
2898 if oInstr.sMnemonic is None:
2899 oInstr.sMnemonic = sLower;
2900 elif oInstr.sMnemonic != sLower:
2901 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2902
2903 # Process operands.
2904 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2905 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2906 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2907 for iOperand, sType in enumerate(asOperands):
2908 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2909 if sWhere is None:
2910 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2911 if iOperand < len(oInstr.aoOperands): # error recovery.
2912 sWhere = oInstr.aoOperands[iOperand].sWhere;
2913 sType = oInstr.aoOperands[iOperand].sType;
2914 else:
2915 sWhere = 'reg';
2916 sType = 'Gb';
2917 if iOperand == len(oInstr.aoOperands):
2918 oInstr.aoOperands.append(Operand(sWhere, sType))
2919 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2920 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2921 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2922 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2923
2924 # Encoding.
2925 if sForm not in g_kdIemForms:
2926 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2927 else:
2928 if oInstr.sEncoding is None:
2929 oInstr.sEncoding = g_kdIemForms[sForm][0];
2930 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2931 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2932 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2933
2934 # Check the parameter locations for the encoding.
2935 if g_kdIemForms[sForm][1] is not None:
2936 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2937 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2938 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2939 else:
2940 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2941 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2942 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2943 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2944
2945 # Stats.
2946 if not self.oReStatsName.match(sStats):
2947 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2948 elif oInstr.sStats is None:
2949 oInstr.sStats = sStats;
2950 elif oInstr.sStats != sStats:
2951 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2952 % (sMacro, oInstr.sStats, sStats,));
2953
2954 # Process the hints (simply merge with @ophints w/o checking anything).
2955 for sHint in sDisHints.split('|'):
2956 sHint = sHint.strip();
2957 if sHint.startswith('DISOPTYPE_'):
2958 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2959 if sShortHint in g_kdHints:
2960 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2961 else:
2962 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2963 elif sHint != '0':
2964 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2965
2966 for sHint in sIemHints.split('|'):
2967 sHint = sHint.strip();
2968 if sHint.startswith('IEMOPHINT_'):
2969 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2970 if sShortHint in g_kdHints:
2971 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2972 else:
2973 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2974 elif sHint != '0':
2975 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2976
2977
2978 _ = sAsm;
2979 return True;
2980
2981 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2982 """
2983 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2984 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2985 """
2986 if not asOperands:
2987 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2988 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2989 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2990
2991 def checkCodeForMacro(self, sCode):
2992 """
2993 Checks code for relevant macro invocation.
2994 """
2995 #
2996 # Scan macro invocations.
2997 #
2998 if sCode.find('(') > 0:
2999 # Look for instruction decoder function definitions. ASSUME single line.
3000 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3001 [ 'FNIEMOP_DEF',
3002 'FNIEMOP_STUB',
3003 'FNIEMOP_STUB_1',
3004 'FNIEMOP_UD_STUB',
3005 'FNIEMOP_UD_STUB_1' ]);
3006 if asArgs is not None:
3007 sFunction = asArgs[1];
3008
3009 if not self.aoCurInstrs:
3010 self.addInstruction();
3011 for oInstr in self.aoCurInstrs:
3012 if oInstr.iLineFnIemOpMacro == -1:
3013 oInstr.iLineFnIemOpMacro = self.iLine;
3014 else:
3015 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3016 self.setInstrunctionAttrib('sFunction', sFunction);
3017 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3018 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3019 if asArgs[0].find('STUB') > 0:
3020 self.doneInstructions();
3021 return True;
3022
3023 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3024 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3025 if asArgs is not None:
3026 if len(self.aoCurInstrs) == 1:
3027 oInstr = self.aoCurInstrs[0];
3028 if oInstr.sStats is None:
3029 oInstr.sStats = asArgs[1];
3030 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3031
3032 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3033 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3034 if asArgs is not None:
3035 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3036 []);
3037 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3038 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3039 if asArgs is not None:
3040 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3041 [asArgs[6],]);
3042 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3043 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3044 if asArgs is not None:
3045 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3046 [asArgs[6], asArgs[7]]);
3047 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3048 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3049 if asArgs is not None:
3050 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3051 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3052 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3053 # a_fIemHints)
3054 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3055 if asArgs is not None:
3056 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3057 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3058
3059 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3060 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3061 if asArgs is not None:
3062 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3063 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3064 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3065 if asArgs is not None:
3066 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3067 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3068 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3069 if asArgs is not None:
3070 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3071 [asArgs[4], asArgs[5],]);
3072 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3073 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3074 if asArgs is not None:
3075 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3076 [asArgs[4], asArgs[5], asArgs[6],]);
3077 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3078 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3079 if asArgs is not None:
3080 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3081 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3082
3083 return False;
3084
3085
3086 def parse(self):
3087 """
3088 Parses the given file.
3089 Returns number or errors.
3090 Raises exception on fatal trouble.
3091 """
3092 #self.debug('Parsing %s' % (self.sSrcFile,));
3093
3094 while self.iLine < len(self.asLines):
3095 sLine = self.asLines[self.iLine];
3096 self.iLine += 1;
3097
3098 # We only look for comments, so only lines with a slash might possibly
3099 # influence the parser state.
3100 offSlash = sLine.find('/');
3101 if offSlash >= 0:
3102 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3103 offLine = 0;
3104 while offLine < len(sLine):
3105 if self.iState == self.kiCode:
3106 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3107 if offHit >= 0:
3108 self.checkCodeForMacro(sLine[offLine:offHit]);
3109 self.sComment = '';
3110 self.iCommentLine = self.iLine;
3111 self.iState = self.kiCommentMulti;
3112 offLine = offHit + 2;
3113 else:
3114 self.checkCodeForMacro(sLine[offLine:]);
3115 offLine = len(sLine);
3116
3117 elif self.iState == self.kiCommentMulti:
3118 offHit = sLine.find('*/', offLine);
3119 if offHit >= 0:
3120 self.sComment += sLine[offLine:offHit];
3121 self.iState = self.kiCode;
3122 offLine = offHit + 2;
3123 self.parseComment();
3124 else:
3125 self.sComment += sLine[offLine:];
3126 offLine = len(sLine);
3127 else:
3128 assert False;
3129 # C++ line comment.
3130 elif offSlash > 0:
3131 self.checkCodeForMacro(sLine[:offSlash]);
3132
3133 # No slash, but append the line if in multi-line comment.
3134 elif self.iState == self.kiCommentMulti:
3135 #self.debug('line %d: multi' % (self.iLine,));
3136 self.sComment += sLine;
3137
3138 # No slash, but check code line for relevant macro.
3139 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3140 #self.debug('line %d: macro' % (self.iLine,));
3141 self.checkCodeForMacro(sLine);
3142
3143 # If the line is a '}' in the first position, complete the instructions.
3144 elif self.iState == self.kiCode and sLine[0] == '}':
3145 #self.debug('line %d: }' % (self.iLine,));
3146 self.doneInstructions();
3147
3148 self.doneInstructions();
3149 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3150 return self.printErrors();
3151
3152
3153def __parseFileByName(sSrcFile, sDefaultMap):
3154 """
3155 Parses one source file for instruction specfications.
3156 """
3157 #
3158 # Read sSrcFile into a line array.
3159 #
3160 try:
3161 oFile = open(sSrcFile, "r");
3162 except Exception as oXcpt:
3163 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3164 try:
3165 asLines = oFile.readlines();
3166 except Exception as oXcpt:
3167 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3168 finally:
3169 oFile.close();
3170
3171 #
3172 # Do the parsing.
3173 #
3174 try:
3175 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3176 except ParserException as oXcpt:
3177 print(str(oXcpt));
3178 raise;
3179 except Exception as oXcpt:
3180 raise;
3181
3182 return cErrors;
3183
3184
3185def __doTestCopying():
3186 """
3187 Executes the asCopyTests instructions.
3188 """
3189 asErrors = [];
3190 for oDstInstr in g_aoAllInstructions:
3191 if oDstInstr.asCopyTests:
3192 for sSrcInstr in oDstInstr.asCopyTests:
3193 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3194 if oSrcInstr:
3195 aoSrcInstrs = [oSrcInstr,];
3196 else:
3197 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3198 if aoSrcInstrs:
3199 for oSrcInstr in aoSrcInstrs:
3200 if oSrcInstr != oDstInstr:
3201 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3202 else:
3203 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3204 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3205 else:
3206 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3207 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3208
3209 if asErrors:
3210 sys.stderr.write(u''.join(asErrors));
3211 return len(asErrors);
3212
3213
3214def __applyOnlyTest():
3215 """
3216 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3217 all other instructions so that only these get tested.
3218 """
3219 if g_aoOnlyTestInstructions:
3220 for oInstr in g_aoAllInstructions:
3221 if oInstr.aoTests:
3222 if oInstr not in g_aoOnlyTestInstructions:
3223 oInstr.aoTests = [];
3224 return 0;
3225
3226def __parseAll():
3227 """
3228 Parses all the IEMAllInstruction*.cpp.h files.
3229
3230 Raises exception on failure.
3231 """
3232 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3233 cErrors = 0;
3234 for sDefaultMap, sName in [
3235 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3236 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3237 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3238 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3239 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3240 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3241 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3242 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3243 ]:
3244 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3245 cErrors += __doTestCopying();
3246 cErrors += __applyOnlyTest();
3247
3248 if cErrors != 0:
3249 #raise Exception('%d parse errors' % (cErrors,));
3250 sys.exit(1);
3251 return True;
3252
3253
3254
3255__parseAll();
3256
3257
3258#
3259# Generators (may perhaps move later).
3260#
3261def generateDisassemblerTables(oDstFile = sys.stdout):
3262 """
3263 Generates disassembler tables.
3264 """
3265
3266 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3267 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3268 assert oMap.sName == sName;
3269 asLines = [];
3270
3271 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3272 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3273 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3274 asLines.append('{');
3275
3276 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3277
3278 aoTableOrder = oMap.getInstructionsInTableOrder();
3279 for iInstr, oInstr in enumerate(aoTableOrder):
3280
3281 if (iInstr & 0xf) == 0:
3282 if iInstr != 0:
3283 asLines.append('');
3284 asLines.append(' /* %x */' % (iInstr >> 4,));
3285
3286 if oInstr is None:
3287 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3288 elif isinstance(oInstr, list):
3289 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3290 else:
3291 sMacro = 'OP';
3292 cMaxOperands = 3;
3293 if len(oInstr.aoOperands) > 3:
3294 sMacro = 'OPVEX'
3295 cMaxOperands = 4;
3296 assert len(oInstr.aoOperands) <= cMaxOperands;
3297
3298 #
3299 # Format string.
3300 #
3301 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3302 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3303 sTmp += ' ' if iOperand == 0 else ',';
3304 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3305 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3306 else:
3307 sTmp += g_kdOpTypes[oOperand.sType][2];
3308 sTmp += '",';
3309 asColumns = [ sTmp, ];
3310
3311 #
3312 # Decoders.
3313 #
3314 iStart = len(asColumns);
3315 if oInstr.sEncoding is None:
3316 pass;
3317 elif oInstr.sEncoding == 'ModR/M':
3318 # ASSUME the first operand is using the ModR/M encoding
3319 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3320 asColumns.append('IDX_ParseModRM,');
3321 ## @todo IDX_ParseVexDest
3322 # Is second operand using ModR/M too?
3323 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3324 asColumns.append('IDX_UseModRM,')
3325 elif oInstr.sEncoding in [ 'prefix', ]:
3326 for oOperand in oInstr.aoOperands:
3327 asColumns.append('0,');
3328 elif oInstr.sEncoding in [ 'fixed' ]:
3329 pass;
3330 elif oInstr.sEncoding == 'vex2':
3331 asColumns.append('IDX_ParseVex2b,')
3332 elif oInstr.sEncoding == 'vex3':
3333 asColumns.append('IDX_ParseVex3b,')
3334 elif oInstr.sEncoding in g_dInstructionMaps:
3335 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3336 else:
3337 ## @todo
3338 #IDX_ParseTwoByteEsc,
3339 #IDX_ParseGrp1,
3340 #IDX_ParseShiftGrp2,
3341 #IDX_ParseGrp3,
3342 #IDX_ParseGrp4,
3343 #IDX_ParseGrp5,
3344 #IDX_Parse3DNow,
3345 #IDX_ParseGrp6,
3346 #IDX_ParseGrp7,
3347 #IDX_ParseGrp8,
3348 #IDX_ParseGrp9,
3349 #IDX_ParseGrp10,
3350 #IDX_ParseGrp12,
3351 #IDX_ParseGrp13,
3352 #IDX_ParseGrp14,
3353 #IDX_ParseGrp15,
3354 #IDX_ParseGrp16,
3355 #IDX_ParseThreeByteEsc4,
3356 #IDX_ParseThreeByteEsc5,
3357 #IDX_ParseModFence,
3358 #IDX_ParseEscFP,
3359 #IDX_ParseNopPause,
3360 #IDX_ParseInvOpModRM,
3361 assert False, str(oInstr);
3362
3363 # Check for immediates and stuff in the remaining operands.
3364 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3365 sIdx = g_kdOpTypes[oOperand.sType][0];
3366 if sIdx != 'IDX_UseModRM':
3367 asColumns.append(sIdx + ',');
3368 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3369
3370 #
3371 # Opcode and operands.
3372 #
3373 assert oInstr.sDisEnum, str(oInstr);
3374 asColumns.append(oInstr.sDisEnum + ',');
3375 iStart = len(asColumns)
3376 for oOperand in oInstr.aoOperands:
3377 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3378 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3379
3380 #
3381 # Flags.
3382 #
3383 sTmp = '';
3384 for sHint in sorted(oInstr.dHints.keys()):
3385 sDefine = g_kdHints[sHint];
3386 if sDefine.startswith('DISOPTYPE_'):
3387 if sTmp:
3388 sTmp += ' | ' + sDefine;
3389 else:
3390 sTmp += sDefine;
3391 if sTmp:
3392 sTmp += '),';
3393 else:
3394 sTmp += '0),';
3395 asColumns.append(sTmp);
3396
3397 #
3398 # Format the columns into a line.
3399 #
3400 sLine = '';
3401 for i, s in enumerate(asColumns):
3402 if len(sLine) < aoffColumns[i]:
3403 sLine += ' ' * (aoffColumns[i] - len(sLine));
3404 else:
3405 sLine += ' ';
3406 sLine += s;
3407
3408 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3409 # DISOPTYPE_HARMLESS),
3410 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3411 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3412
3413 asLines.append(sLine);
3414
3415 asLines.append('};');
3416 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3417
3418 #
3419 # Write out the lines.
3420 #
3421 oDstFile.write('\n'.join(asLines));
3422 oDstFile.write('\n');
3423 break; #for now
3424
3425if __name__ == '__main__':
3426 generateDisassemblerTables();
3427
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette