VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66909

Last change on this file since 66909 was 66909, checked in by vboxsync, 8 years ago

IEM: Implemented vmovups Wps,Vps (VEX.0F 11). Needs more work on the test side.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 143.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66909 2017-05-16 13:29:44Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66909 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
235 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
236 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
237
238 # ModR/M.rm - memory only.
239 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
240 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
241 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
242 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
243 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
244 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
245 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
246 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
247 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
248
249 # ModR/M.reg
250 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
251 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
252 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
253 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
254 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
255 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
256 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
257 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
258 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
259 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
260 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
261 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
262 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
263 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
264 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
265 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
266 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
267 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
268 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
269 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
270 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
271
272 # VEX.vvvv
273 'HdqCss': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCss', ),
274 'HdqCsd': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCsd', ),
275
276 # Immediate values.
277 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
278 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
279 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
280 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
281 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
282 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
283
284 # Address operands (no ModR/M).
285 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
286 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
287
288 # Relative jump targets
289 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
290 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
291
292 # DS:rSI
293 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
294 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
295 # ES:rDI
296 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
297 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
298
299 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
300
301 # Fixed registers.
302 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
303 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
304 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
305 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
306 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
307 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
308 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
309 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
310};
311
312# IDX_ParseFixedReg
313# IDX_ParseVexDest
314
315
316## IEMFORM_XXX mappings.
317g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
318 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
319 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
320 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
321 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
322 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
323 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
324 'M': ( 'ModR/M', [ 'rm', ], ),
325 'M_REG': ( 'ModR/M', [ 'rm', ], ),
326 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
327 'R': ( 'ModR/M', [ 'reg', ], ),
328
329 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
330 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
331 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
332 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM
333 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
334 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
335 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
336 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
337 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
338 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
339 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
340 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
341 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
342
343 'FIXED': ( 'fixed', None, )
344};
345
346## \@oppfx values.
347g_kdPrefixes = {
348 'none': [],
349 '0x66': [],
350 '0xf3': [],
351 '0xf2': [],
352};
353
354## Special \@opcode tag values.
355g_kdSpecialOpcodes = {
356 '/reg': [],
357 'mr/reg': [],
358 '11 /reg': [],
359 '!11 /reg': [],
360 '11 mr/reg': [],
361 '!11 mr/reg': [],
362};
363
364## Special \@opcodesub tag values.
365g_kdSubOpcodes = {
366 'none': [ None, ],
367 '11 mr/reg': [ '11 mr/reg', ],
368 '11': [ '11 mr/reg', ], ##< alias
369 '!11 mr/reg': [ '!11 mr/reg', ],
370 '!11': [ '!11 mr/reg', ], ##< alias
371};
372
373## Valid values for \@openc
374g_kdEncodings = {
375 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
376 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
377 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
378 'prefix': [ None, ], ##< Prefix
379};
380
381## \@opunused, \@opinvalid, \@opinvlstyle
382g_kdInvalidStyles = {
383 'immediate': [], ##< CPU stops decoding immediately after the opcode.
384 'intel-modrm': [], ##< Intel decodes ModR/M.
385 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
386 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
387 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
388};
389
390g_kdCpuNames = {
391 '8086': (),
392 '80186': (),
393 '80286': (),
394 '80386': (),
395 '80486': (),
396};
397
398## \@opcpuid
399g_kdCpuIdFlags = {
400 'vme': 'X86_CPUID_FEATURE_EDX_VME',
401 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
402 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
403 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
404 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
405 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
406 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
407 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
408 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
409 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
410 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
411 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
412 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
413 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
414 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
415 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
416 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
417 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
418 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
419 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
420 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
421 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
422 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
423 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
424 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
425 'aes': 'X86_CPUID_FEATURE_ECX_AES',
426 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
427 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
428 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
429 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
430
431 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
432 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
433 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
434 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
435 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
436 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
437 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
438 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
439 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
440 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
441};
442
443## \@ophints values.
444g_kdHints = {
445 'invalid': 'DISOPTYPE_INVALID', ##<
446 'harmless': 'DISOPTYPE_HARMLESS', ##<
447 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
448 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
449 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
450 'portio': 'DISOPTYPE_PORTIO', ##<
451 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
452 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
453 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
454 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
455 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
456 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
457 'illegal': 'DISOPTYPE_ILLEGAL', ##<
458 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
459 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
460 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
461 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
462 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
463 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
464 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
465 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
466 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
467 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
468 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
469 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
470 ## (only in 16 & 32 bits mode!)
471 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
472 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
473 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
474 'ignores_op_size': '', ##< Ignores both operand size prefixes.
475 'lock_allowed': '', ##< Lock prefix allowed.
476};
477
478## \@opxcpttype values (see SDMv2 2.4, 2.7).
479g_kdXcptTypes = {
480 'none': [],
481 '1': [],
482 '2': [],
483 '3': [],
484 '4': [],
485 '4UA': [],
486 '5': [],
487 '6': [],
488 '7': [],
489 '8': [],
490 '11': [],
491 '12': [],
492 'E1': [],
493 'E1NF': [],
494 'E2': [],
495 'E3': [],
496 'E3NF': [],
497 'E4': [],
498 'E4NF': [],
499 'E5': [],
500 'E5NF': [],
501 'E6': [],
502 'E6NF': [],
503 'E7NF': [],
504 'E9': [],
505 'E9NF': [],
506 'E10': [],
507 'E11': [],
508 'E12': [],
509 'E12NF': [],
510};
511
512
513def _isValidOpcodeByte(sOpcode):
514 """
515 Checks if sOpcode is a valid lower case opcode byte.
516 Returns true/false.
517 """
518 if len(sOpcode) == 4:
519 if sOpcode[:2] == '0x':
520 if sOpcode[2] in '0123456789abcdef':
521 if sOpcode[3] in '0123456789abcdef':
522 return True;
523 return False;
524
525
526class InstructionMap(object):
527 """
528 Instruction map.
529
530 The opcode map provides the lead opcode bytes (empty for the one byte
531 opcode map). An instruction can be member of multiple opcode maps as long
532 as it uses the same opcode value within the map (because of VEX).
533 """
534
535 kdEncodings = {
536 'legacy': [],
537 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
538 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
539 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
540 'xop8': [], ##< XOP prefix with vvvvv = 8
541 'xop9': [], ##< XOP prefix with vvvvv = 9
542 'xop10': [], ##< XOP prefix with vvvvv = 10
543 };
544 ## Selectors.
545 ## The first value is the number of table entries required by a
546 ## decoder or disassembler for this type of selector.
547 kdSelectors = {
548 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
549 '/r': [ 8, ], ##< modrm.reg selects the instruction.
550 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
551 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
552 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
553 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
554 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
555 };
556
557 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
558 assert sSelector in self.kdSelectors;
559 assert sEncoding in self.kdEncodings;
560 if asLeadOpcodes is None:
561 asLeadOpcodes = [];
562 else:
563 for sOpcode in asLeadOpcodes:
564 assert _isValidOpcodeByte(sOpcode);
565 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
566
567 self.sName = sName;
568 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
569 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
570 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
571 self.aoInstructions = []; # type: Instruction
572 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
573
574 def getTableSize(self):
575 """
576 Number of table entries. This corresponds directly to the selector.
577 """
578 return self.kdSelectors[self.sSelector][0];
579
580 def getInstructionIndex(self, oInstr):
581 """
582 Returns the table index for the instruction.
583 """
584 bOpcode = oInstr.getOpcodeByte();
585
586 # The byte selector is simple. We need a full opcode byte and need just return it.
587 if self.sSelector == 'byte':
588 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
589 return bOpcode;
590
591 # The other selectors needs masking and shifting.
592 if self.sSelector == '/r':
593 return (bOpcode >> 3) & 0x7;
594
595 if self.sSelector == 'mod /r':
596 return (bOpcode >> 3) & 0x1f;
597
598 if self.sSelector == 'memreg /r':
599 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
600
601 if self.sSelector == '!11 /r':
602 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
603 return (bOpcode >> 3) & 0x7;
604
605 if self.sSelector == '11 /r':
606 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
607 return (bOpcode >> 3) & 0x7;
608
609 if self.sSelector == '11':
610 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
611 return bOpcode & 0x3f;
612
613 assert False, self.sSelector;
614 return -1;
615
616 def getInstructionsInTableOrder(self):
617 """
618 Get instructions in table order.
619
620 Returns array of instructions. Normally there is exactly one
621 instruction per entry. However the entry could also be None if
622 not instruction was specified for that opcode value. Or there
623 could be a list of instructions to deal with special encodings
624 where for instance prefix (e.g. REX.W) encodes a different
625 instruction or different CPUs have different instructions or
626 prefixes in the same place.
627 """
628 # Start with empty table.
629 cTable = self.getTableSize();
630 aoTable = [None] * cTable;
631
632 # Insert the instructions.
633 for oInstr in self.aoInstructions:
634 if oInstr.sOpcode:
635 idxOpcode = self.getInstructionIndex(oInstr);
636 assert idxOpcode < cTable, str(idxOpcode);
637
638 oExisting = aoTable[idxOpcode];
639 if oExisting is None:
640 aoTable[idxOpcode] = oInstr;
641 elif not isinstance(oExisting, list):
642 aoTable[idxOpcode] = list([oExisting, oInstr]);
643 else:
644 oExisting.append(oInstr);
645
646 return aoTable;
647
648
649 def getDisasTableName(self):
650 """
651 Returns the disassembler table name for this map.
652 """
653 sName = 'g_aDisas';
654 for sWord in self.sName.split('_'):
655 if sWord == 'm': # suffix indicating modrm.mod==mem
656 sName += '_m';
657 elif sWord == 'r': # suffix indicating modrm.mod==reg
658 sName += '_r';
659 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
660 sName += '_' + sWord;
661 else:
662 sWord = sWord.replace('grp', 'Grp');
663 sWord = sWord.replace('map', 'Map');
664 sName += sWord[0].upper() + sWord[1:];
665 return sName;
666
667
668class TestType(object):
669 """
670 Test value type.
671
672 This base class deals with integer like values. The fUnsigned constructor
673 parameter indicates the default stance on zero vs sign extending. It is
674 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
675 """
676 def __init__(self, sName, acbSizes = None, fUnsigned = True):
677 self.sName = sName;
678 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
679 self.fUnsigned = fUnsigned;
680
681 class BadValue(Exception):
682 """ Bad value exception. """
683 def __init__(self, sMessage):
684 Exception.__init__(self, sMessage);
685 self.sMessage = sMessage;
686
687 ## For ascii ~ operator.
688 kdHexInv = {
689 '0': 'f',
690 '1': 'e',
691 '2': 'd',
692 '3': 'c',
693 '4': 'b',
694 '5': 'a',
695 '6': '9',
696 '7': '8',
697 '8': '7',
698 '9': '6',
699 'a': '5',
700 'b': '4',
701 'c': '3',
702 'd': '2',
703 'e': '1',
704 'f': '0',
705 };
706
707 def get(self, sValue):
708 """
709 Get the shortest normal sized byte representation of oValue.
710
711 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
712 The latter form is for AND+OR pairs where the first entry is what to
713 AND with the field and the second the one or OR with.
714
715 Raises BadValue if invalid value.
716 """
717 if not sValue:
718 raise TestType.BadValue('empty value');
719
720 # Deal with sign and detect hexadecimal or decimal.
721 fSignExtend = not self.fUnsigned;
722 if sValue[0] == '-' or sValue[0] == '+':
723 fSignExtend = True;
724 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
725 else:
726 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
727
728 # try convert it to long integer.
729 try:
730 iValue = long(sValue, 16 if fHex else 10);
731 except Exception as oXcpt:
732 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
733
734 # Convert the hex string and pad it to a decent value. Negative values
735 # needs to be manually converted to something non-negative (~-n + 1).
736 if iValue >= 0:
737 sHex = hex(iValue);
738 if sys.version_info[0] < 3:
739 assert sHex[-1] == 'L';
740 sHex = sHex[:-1];
741 assert sHex[:2] == '0x';
742 sHex = sHex[2:];
743 else:
744 sHex = hex(-iValue - 1);
745 if sys.version_info[0] < 3:
746 assert sHex[-1] == 'L';
747 sHex = sHex[:-1];
748 assert sHex[:2] == '0x';
749 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
750 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
751 sHex = 'f' + sHex;
752
753 cDigits = len(sHex);
754 if cDigits <= self.acbSizes[-1] * 2:
755 for cb in self.acbSizes:
756 cNaturalDigits = cb * 2;
757 if cDigits <= cNaturalDigits:
758 break;
759 else:
760 cNaturalDigits = self.acbSizes[-1] * 2;
761 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
762 assert isinstance(cNaturalDigits, int)
763
764 if cNaturalDigits != cDigits:
765 cNeeded = cNaturalDigits - cDigits;
766 if iValue >= 0:
767 sHex = ('0' * cNeeded) + sHex;
768 else:
769 sHex = ('f' * cNeeded) + sHex;
770
771 # Invert and convert to bytearray and return it.
772 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
773
774 return ((fSignExtend, abValue),);
775
776 def validate(self, sValue):
777 """
778 Returns True if value is okay, error message on failure.
779 """
780 try:
781 self.get(sValue);
782 except TestType.BadValue as oXcpt:
783 return oXcpt.sMessage;
784 return True;
785
786 def isAndOrPair(self, sValue):
787 """
788 Checks if sValue is a pair.
789 """
790 _ = sValue;
791 return False;
792
793
794class TestTypeEflags(TestType):
795 """
796 Special value parsing for EFLAGS/RFLAGS/FLAGS.
797 """
798
799 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
800
801 def __init__(self, sName):
802 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
803
804 def get(self, sValue):
805 fClear = 0;
806 fSet = 0;
807 for sFlag in sValue.split(','):
808 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
809 if sConstant is None:
810 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
811 if sConstant[0] == '!':
812 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
813 else:
814 fSet |= g_kdX86EFlagsConstants[sConstant];
815
816 aoSet = TestType.get(self, '0x%x' % (fSet,));
817 if fClear != 0:
818 aoClear = TestType.get(self, '%#x' % (fClear,))
819 assert self.isAndOrPair(sValue) is True;
820 return (aoClear[0], aoSet[0]);
821 assert self.isAndOrPair(sValue) is False;
822 return aoSet;
823
824 def isAndOrPair(self, sValue):
825 for sZeroFlag in self.kdZeroValueFlags:
826 if sValue.find(sZeroFlag) >= 0:
827 return True;
828 return False;
829
830class TestTypeFromDict(TestType):
831 """
832 Special value parsing for CR0.
833 """
834
835 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
836
837 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
838 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
839 self.kdConstantsAndValues = kdConstantsAndValues;
840 self.sConstantPrefix = sConstantPrefix;
841
842 def get(self, sValue):
843 fValue = 0;
844 for sFlag in sValue.split(','):
845 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
846 if fFlagValue is None:
847 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
848 fValue |= fFlagValue;
849 return TestType.get(self, '0x%x' % (fValue,));
850
851
852class TestInOut(object):
853 """
854 One input or output state modifier.
855
856 This should be thought as values to modify BS3REGCTX and extended (needs
857 to be structured) state.
858 """
859 ## Assigned operators.
860 kasOperators = [
861 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
862 '&~=',
863 '&=',
864 '|=',
865 '='
866 ];
867 ## Types
868 kdTypes = {
869 'uint': TestType('uint', fUnsigned = True),
870 'int': TestType('int'),
871 'efl': TestTypeEflags('efl'),
872 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
873 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
874 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
875 };
876 ## CPU context fields.
877 kdFields = {
878 # name: ( default type, [both|input|output], )
879 # Operands.
880 'op1': ( 'uint', 'both', ), ## \@op1
881 'op2': ( 'uint', 'both', ), ## \@op2
882 'op3': ( 'uint', 'both', ), ## \@op3
883 'op4': ( 'uint', 'both', ), ## \@op4
884 # Flags.
885 'efl': ( 'efl', 'both', ),
886 'efl_undef': ( 'uint', 'output', ),
887 # 8-bit GPRs.
888 'al': ( 'uint', 'both', ),
889 'cl': ( 'uint', 'both', ),
890 'dl': ( 'uint', 'both', ),
891 'bl': ( 'uint', 'both', ),
892 'ah': ( 'uint', 'both', ),
893 'ch': ( 'uint', 'both', ),
894 'dh': ( 'uint', 'both', ),
895 'bh': ( 'uint', 'both', ),
896 'r8l': ( 'uint', 'both', ),
897 'r9l': ( 'uint', 'both', ),
898 'r10l': ( 'uint', 'both', ),
899 'r11l': ( 'uint', 'both', ),
900 'r12l': ( 'uint', 'both', ),
901 'r13l': ( 'uint', 'both', ),
902 'r14l': ( 'uint', 'both', ),
903 'r15l': ( 'uint', 'both', ),
904 # 16-bit GPRs.
905 'ax': ( 'uint', 'both', ),
906 'dx': ( 'uint', 'both', ),
907 'cx': ( 'uint', 'both', ),
908 'bx': ( 'uint', 'both', ),
909 'sp': ( 'uint', 'both', ),
910 'bp': ( 'uint', 'both', ),
911 'si': ( 'uint', 'both', ),
912 'di': ( 'uint', 'both', ),
913 'r8w': ( 'uint', 'both', ),
914 'r9w': ( 'uint', 'both', ),
915 'r10w': ( 'uint', 'both', ),
916 'r11w': ( 'uint', 'both', ),
917 'r12w': ( 'uint', 'both', ),
918 'r13w': ( 'uint', 'both', ),
919 'r14w': ( 'uint', 'both', ),
920 'r15w': ( 'uint', 'both', ),
921 # 32-bit GPRs.
922 'eax': ( 'uint', 'both', ),
923 'edx': ( 'uint', 'both', ),
924 'ecx': ( 'uint', 'both', ),
925 'ebx': ( 'uint', 'both', ),
926 'esp': ( 'uint', 'both', ),
927 'ebp': ( 'uint', 'both', ),
928 'esi': ( 'uint', 'both', ),
929 'edi': ( 'uint', 'both', ),
930 'r8d': ( 'uint', 'both', ),
931 'r9d': ( 'uint', 'both', ),
932 'r10d': ( 'uint', 'both', ),
933 'r11d': ( 'uint', 'both', ),
934 'r12d': ( 'uint', 'both', ),
935 'r13d': ( 'uint', 'both', ),
936 'r14d': ( 'uint', 'both', ),
937 'r15d': ( 'uint', 'both', ),
938 # 64-bit GPRs.
939 'rax': ( 'uint', 'both', ),
940 'rdx': ( 'uint', 'both', ),
941 'rcx': ( 'uint', 'both', ),
942 'rbx': ( 'uint', 'both', ),
943 'rsp': ( 'uint', 'both', ),
944 'rbp': ( 'uint', 'both', ),
945 'rsi': ( 'uint', 'both', ),
946 'rdi': ( 'uint', 'both', ),
947 'r8': ( 'uint', 'both', ),
948 'r9': ( 'uint', 'both', ),
949 'r10': ( 'uint', 'both', ),
950 'r11': ( 'uint', 'both', ),
951 'r12': ( 'uint', 'both', ),
952 'r13': ( 'uint', 'both', ),
953 'r14': ( 'uint', 'both', ),
954 'r15': ( 'uint', 'both', ),
955 # 16-bit, 32-bit or 64-bit registers according to operand size.
956 'oz.rax': ( 'uint', 'both', ),
957 'oz.rdx': ( 'uint', 'both', ),
958 'oz.rcx': ( 'uint', 'both', ),
959 'oz.rbx': ( 'uint', 'both', ),
960 'oz.rsp': ( 'uint', 'both', ),
961 'oz.rbp': ( 'uint', 'both', ),
962 'oz.rsi': ( 'uint', 'both', ),
963 'oz.rdi': ( 'uint', 'both', ),
964 'oz.r8': ( 'uint', 'both', ),
965 'oz.r9': ( 'uint', 'both', ),
966 'oz.r10': ( 'uint', 'both', ),
967 'oz.r11': ( 'uint', 'both', ),
968 'oz.r12': ( 'uint', 'both', ),
969 'oz.r13': ( 'uint', 'both', ),
970 'oz.r14': ( 'uint', 'both', ),
971 'oz.r15': ( 'uint', 'both', ),
972 # Control registers.
973 'cr0': ( 'cr0', 'both', ),
974 'cr4': ( 'cr4', 'both', ),
975 'xcr0': ( 'xcr0', 'both', ),
976 # FPU Registers
977 'fcw': ( 'uint', 'both', ),
978 'fsw': ( 'uint', 'both', ),
979 'ftw': ( 'uint', 'both', ),
980 'fop': ( 'uint', 'both', ),
981 'fpuip': ( 'uint', 'both', ),
982 'fpucs': ( 'uint', 'both', ),
983 'fpudp': ( 'uint', 'both', ),
984 'fpuds': ( 'uint', 'both', ),
985 'mxcsr': ( 'uint', 'both', ),
986 'st0': ( 'uint', 'both', ),
987 'st1': ( 'uint', 'both', ),
988 'st2': ( 'uint', 'both', ),
989 'st3': ( 'uint', 'both', ),
990 'st4': ( 'uint', 'both', ),
991 'st5': ( 'uint', 'both', ),
992 'st6': ( 'uint', 'both', ),
993 'st7': ( 'uint', 'both', ),
994 # MMX registers.
995 'mm0': ( 'uint', 'both', ),
996 'mm1': ( 'uint', 'both', ),
997 'mm2': ( 'uint', 'both', ),
998 'mm3': ( 'uint', 'both', ),
999 'mm4': ( 'uint', 'both', ),
1000 'mm5': ( 'uint', 'both', ),
1001 'mm6': ( 'uint', 'both', ),
1002 'mm7': ( 'uint', 'both', ),
1003 # SSE registers.
1004 'xmm0': ( 'uint', 'both', ),
1005 'xmm1': ( 'uint', 'both', ),
1006 'xmm2': ( 'uint', 'both', ),
1007 'xmm3': ( 'uint', 'both', ),
1008 'xmm4': ( 'uint', 'both', ),
1009 'xmm5': ( 'uint', 'both', ),
1010 'xmm6': ( 'uint', 'both', ),
1011 'xmm7': ( 'uint', 'both', ),
1012 'xmm8': ( 'uint', 'both', ),
1013 'xmm9': ( 'uint', 'both', ),
1014 'xmm10': ( 'uint', 'both', ),
1015 'xmm11': ( 'uint', 'both', ),
1016 'xmm12': ( 'uint', 'both', ),
1017 'xmm13': ( 'uint', 'both', ),
1018 'xmm14': ( 'uint', 'both', ),
1019 'xmm15': ( 'uint', 'both', ),
1020 'xmm0.lo': ( 'uint', 'both', ),
1021 'xmm1.lo': ( 'uint', 'both', ),
1022 'xmm2.lo': ( 'uint', 'both', ),
1023 'xmm3.lo': ( 'uint', 'both', ),
1024 'xmm4.lo': ( 'uint', 'both', ),
1025 'xmm5.lo': ( 'uint', 'both', ),
1026 'xmm6.lo': ( 'uint', 'both', ),
1027 'xmm7.lo': ( 'uint', 'both', ),
1028 'xmm8.lo': ( 'uint', 'both', ),
1029 'xmm9.lo': ( 'uint', 'both', ),
1030 'xmm10.lo': ( 'uint', 'both', ),
1031 'xmm11.lo': ( 'uint', 'both', ),
1032 'xmm12.lo': ( 'uint', 'both', ),
1033 'xmm13.lo': ( 'uint', 'both', ),
1034 'xmm14.lo': ( 'uint', 'both', ),
1035 'xmm15.lo': ( 'uint', 'both', ),
1036 'xmm0.hi': ( 'uint', 'both', ),
1037 'xmm1.hi': ( 'uint', 'both', ),
1038 'xmm2.hi': ( 'uint', 'both', ),
1039 'xmm3.hi': ( 'uint', 'both', ),
1040 'xmm4.hi': ( 'uint', 'both', ),
1041 'xmm5.hi': ( 'uint', 'both', ),
1042 'xmm6.hi': ( 'uint', 'both', ),
1043 'xmm7.hi': ( 'uint', 'both', ),
1044 'xmm8.hi': ( 'uint', 'both', ),
1045 'xmm9.hi': ( 'uint', 'both', ),
1046 'xmm10.hi': ( 'uint', 'both', ),
1047 'xmm11.hi': ( 'uint', 'both', ),
1048 'xmm12.hi': ( 'uint', 'both', ),
1049 'xmm13.hi': ( 'uint', 'both', ),
1050 'xmm14.hi': ( 'uint', 'both', ),
1051 'xmm15.hi': ( 'uint', 'both', ),
1052 'xmm0.lo.zx': ( 'uint', 'both', ),
1053 'xmm1.lo.zx': ( 'uint', 'both', ),
1054 'xmm2.lo.zx': ( 'uint', 'both', ),
1055 'xmm3.lo.zx': ( 'uint', 'both', ),
1056 'xmm4.lo.zx': ( 'uint', 'both', ),
1057 'xmm5.lo.zx': ( 'uint', 'both', ),
1058 'xmm6.lo.zx': ( 'uint', 'both', ),
1059 'xmm7.lo.zx': ( 'uint', 'both', ),
1060 'xmm8.lo.zx': ( 'uint', 'both', ),
1061 'xmm9.lo.zx': ( 'uint', 'both', ),
1062 'xmm10.lo.zx': ( 'uint', 'both', ),
1063 'xmm11.lo.zx': ( 'uint', 'both', ),
1064 'xmm12.lo.zx': ( 'uint', 'both', ),
1065 'xmm13.lo.zx': ( 'uint', 'both', ),
1066 'xmm14.lo.zx': ( 'uint', 'both', ),
1067 'xmm15.lo.zx': ( 'uint', 'both', ),
1068 'xmm0.dw0': ( 'uint', 'both', ),
1069 'xmm1.dw0': ( 'uint', 'both', ),
1070 'xmm2.dw0': ( 'uint', 'both', ),
1071 'xmm3.dw0': ( 'uint', 'both', ),
1072 'xmm4.dw0': ( 'uint', 'both', ),
1073 'xmm5.dw0': ( 'uint', 'both', ),
1074 'xmm6.dw0': ( 'uint', 'both', ),
1075 'xmm7.dw0': ( 'uint', 'both', ),
1076 'xmm8.dw0': ( 'uint', 'both', ),
1077 'xmm9.dw0': ( 'uint', 'both', ),
1078 'xmm10.dw0': ( 'uint', 'both', ),
1079 'xmm11.dw0': ( 'uint', 'both', ),
1080 'xmm12.dw0': ( 'uint', 'both', ),
1081 'xmm13.dw0': ( 'uint', 'both', ),
1082 'xmm14.dw0': ( 'uint', 'both', ),
1083 'xmm15_dw0': ( 'uint', 'both', ),
1084 # AVX registers.
1085 'ymm0': ( 'uint', 'both', ),
1086 'ymm1': ( 'uint', 'both', ),
1087 'ymm2': ( 'uint', 'both', ),
1088 'ymm3': ( 'uint', 'both', ),
1089 'ymm4': ( 'uint', 'both', ),
1090 'ymm5': ( 'uint', 'both', ),
1091 'ymm6': ( 'uint', 'both', ),
1092 'ymm7': ( 'uint', 'both', ),
1093 'ymm8': ( 'uint', 'both', ),
1094 'ymm9': ( 'uint', 'both', ),
1095 'ymm10': ( 'uint', 'both', ),
1096 'ymm11': ( 'uint', 'both', ),
1097 'ymm12': ( 'uint', 'both', ),
1098 'ymm13': ( 'uint', 'both', ),
1099 'ymm14': ( 'uint', 'both', ),
1100 'ymm15': ( 'uint', 'both', ),
1101
1102 # Special ones.
1103 'value.xcpt': ( 'uint', 'output', ),
1104 };
1105
1106 def __init__(self, sField, sOp, sValue, sType):
1107 assert sField in self.kdFields;
1108 assert sOp in self.kasOperators;
1109 self.sField = sField;
1110 self.sOp = sOp;
1111 self.sValue = sValue;
1112 self.sType = sType;
1113 assert isinstance(sField, str);
1114 assert isinstance(sOp, str);
1115 assert isinstance(sType, str);
1116 assert isinstance(sValue, str);
1117
1118
1119class TestSelector(object):
1120 """
1121 One selector for an instruction test.
1122 """
1123 ## Selector compare operators.
1124 kasCompareOps = [ '==', '!=' ];
1125 ## Selector variables and their valid values.
1126 kdVariables = {
1127 # Operand size.
1128 'size': {
1129 'o16': 'size_o16',
1130 'o32': 'size_o32',
1131 'o64': 'size_o64',
1132 },
1133 # Execution ring.
1134 'ring': {
1135 '0': 'ring_0',
1136 '1': 'ring_1',
1137 '2': 'ring_2',
1138 '3': 'ring_3',
1139 '0..2': 'ring_0_thru_2',
1140 '1..3': 'ring_1_thru_3',
1141 },
1142 # Basic code mode.
1143 'codebits': {
1144 '64': 'code_64bit',
1145 '32': 'code_32bit',
1146 '16': 'code_16bit',
1147 },
1148 # cpu modes.
1149 'mode': {
1150 'real': 'mode_real',
1151 'prot': 'mode_prot',
1152 'long': 'mode_long',
1153 'v86': 'mode_v86',
1154 'smm': 'mode_smm',
1155 'vmx': 'mode_vmx',
1156 'svm': 'mode_svm',
1157 },
1158 # paging on/off
1159 'paging': {
1160 'on': 'paging_on',
1161 'off': 'paging_off',
1162 },
1163 # CPU vendor
1164 'vendor': {
1165 'amd': 'vendor_amd',
1166 'intel': 'vendor_intel',
1167 'via': 'vendor_via',
1168 },
1169 };
1170 ## Selector shorthand predicates.
1171 ## These translates into variable expressions.
1172 kdPredicates = {
1173 'o16': 'size==o16',
1174 'o32': 'size==o32',
1175 'o64': 'size==o64',
1176 'ring0': 'ring==0',
1177 '!ring0': 'ring==1..3',
1178 'ring1': 'ring==1',
1179 'ring2': 'ring==2',
1180 'ring3': 'ring==3',
1181 'user': 'ring==3',
1182 'supervisor': 'ring==0..2',
1183 'real': 'mode==real',
1184 'prot': 'mode==prot',
1185 'long': 'mode==long',
1186 'v86': 'mode==v86',
1187 'smm': 'mode==smm',
1188 'vmx': 'mode==vmx',
1189 'svm': 'mode==svm',
1190 'paging': 'paging==on',
1191 '!paging': 'paging==off',
1192 'amd': 'vendor==amd',
1193 '!amd': 'vendor!=amd',
1194 'intel': 'vendor==intel',
1195 '!intel': 'vendor!=intel',
1196 'via': 'vendor==via',
1197 '!via': 'vendor!=via',
1198 };
1199
1200 def __init__(self, sVariable, sOp, sValue):
1201 assert sVariable in self.kdVariables;
1202 assert sOp in self.kasCompareOps;
1203 assert sValue in self.kdVariables[sVariable];
1204 self.sVariable = sVariable;
1205 self.sOp = sOp;
1206 self.sValue = sValue;
1207
1208
1209class InstructionTest(object):
1210 """
1211 Instruction test.
1212 """
1213
1214 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1215 self.oInstr = oInstr; # type: InstructionTest
1216 self.aoInputs = []; # type: list(TestInOut)
1217 self.aoOutputs = []; # type: list(TestInOut)
1218 self.aoSelectors = []; # type: list(TestSelector)
1219
1220 def toString(self, fRepr = False):
1221 """
1222 Converts it to string representation.
1223 """
1224 asWords = [];
1225 if self.aoSelectors:
1226 for oSelector in self.aoSelectors:
1227 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1228 asWords.append('/');
1229
1230 for oModifier in self.aoInputs:
1231 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1232
1233 asWords.append('->');
1234
1235 for oModifier in self.aoOutputs:
1236 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1237
1238 if fRepr:
1239 return '<' + ' '.join(asWords) + '>';
1240 return ' '.join(asWords);
1241
1242 def __str__(self):
1243 """ Provide string represenation. """
1244 return self.toString(False);
1245
1246 def __repr__(self):
1247 """ Provide unambigious string representation. """
1248 return self.toString(True);
1249
1250class Operand(object):
1251 """
1252 Instruction operand.
1253 """
1254
1255 def __init__(self, sWhere, sType):
1256 assert sWhere in g_kdOpLocations, sWhere;
1257 assert sType in g_kdOpTypes, sType;
1258 self.sWhere = sWhere; ##< g_kdOpLocations
1259 self.sType = sType; ##< g_kdOpTypes
1260
1261 def usesModRM(self):
1262 """ Returns True if using some form of ModR/M encoding. """
1263 return self.sType[0] in ['E', 'G', 'M'];
1264
1265
1266
1267class Instruction(object): # pylint: disable=too-many-instance-attributes
1268 """
1269 Instruction.
1270 """
1271
1272 def __init__(self, sSrcFile, iLine):
1273 ## @name Core attributes.
1274 ## @{
1275 self.sMnemonic = None;
1276 self.sBrief = None;
1277 self.asDescSections = []; # type: list(str)
1278 self.aoMaps = []; # type: list(InstructionMap)
1279 self.aoOperands = []; # type: list(Operand)
1280 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1281 self.sOpcode = None; # type: str
1282 self.sSubOpcode = None; # type: str
1283 self.sEncoding = None;
1284 self.asFlTest = None;
1285 self.asFlModify = None;
1286 self.asFlUndefined = None;
1287 self.asFlSet = None;
1288 self.asFlClear = None;
1289 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1290 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1291 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1292 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1293 self.aoTests = []; # type: list(InstructionTest)
1294 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1295 self.oCpuExpr = None; ##< Some CPU restriction expression...
1296 self.sGroup = None;
1297 self.fUnused = False; ##< Unused instruction.
1298 self.fInvalid = False; ##< Invalid instruction (like UD2).
1299 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1300 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1301 ## @}
1302
1303 ## @name Implementation attributes.
1304 ## @{
1305 self.sStats = None;
1306 self.sFunction = None;
1307 self.fStub = False;
1308 self.fUdStub = False;
1309 ## @}
1310
1311 ## @name Decoding info
1312 ## @{
1313 self.sSrcFile = sSrcFile;
1314 self.iLineCreated = iLine;
1315 self.iLineCompleted = None;
1316 self.cOpTags = 0;
1317 self.iLineFnIemOpMacro = -1;
1318 self.iLineMnemonicMacro = -1;
1319 ## @}
1320
1321 ## @name Intermediate input fields.
1322 ## @{
1323 self.sRawDisOpNo = None;
1324 self.asRawDisParams = [];
1325 self.sRawIemOpFlags = None;
1326 self.sRawOldOpcodes = None;
1327 self.asCopyTests = [];
1328 ## @}
1329
1330 def toString(self, fRepr = False):
1331 """ Turn object into a string. """
1332 aasFields = [];
1333
1334 aasFields.append(['opcode', self.sOpcode]);
1335 aasFields.append(['mnemonic', self.sMnemonic]);
1336 for iOperand, oOperand in enumerate(self.aoOperands):
1337 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1338 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1339 aasFields.append(['encoding', self.sEncoding]);
1340 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1341 aasFields.append(['disenum', self.sDisEnum]);
1342 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1343 aasFields.append(['group', self.sGroup]);
1344 if self.fUnused: aasFields.append(['unused', 'True']);
1345 if self.fInvalid: aasFields.append(['invalid', 'True']);
1346 aasFields.append(['invlstyle', self.sInvalidStyle]);
1347 aasFields.append(['fltest', self.asFlTest]);
1348 aasFields.append(['flmodify', self.asFlModify]);
1349 aasFields.append(['flundef', self.asFlUndefined]);
1350 aasFields.append(['flset', self.asFlSet]);
1351 aasFields.append(['flclear', self.asFlClear]);
1352 aasFields.append(['mincpu', self.sMinCpu]);
1353 aasFields.append(['stats', self.sStats]);
1354 aasFields.append(['sFunction', self.sFunction]);
1355 if self.fStub: aasFields.append(['fStub', 'True']);
1356 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1357 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1358 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1359 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1360
1361 sRet = '<' if fRepr else '';
1362 for sField, sValue in aasFields:
1363 if sValue != None:
1364 if len(sRet) > 1:
1365 sRet += '; ';
1366 sRet += '%s=%s' % (sField, sValue,);
1367 if fRepr:
1368 sRet += '>';
1369
1370 return sRet;
1371
1372 def __str__(self):
1373 """ Provide string represenation. """
1374 return self.toString(False);
1375
1376 def __repr__(self):
1377 """ Provide unambigious string representation. """
1378 return self.toString(True);
1379
1380 def getOpcodeByte(self):
1381 """
1382 Decodes sOpcode into a byte range integer value.
1383 Raises exception if sOpcode is None or invalid.
1384 """
1385 if self.sOpcode is None:
1386 raise Exception('No opcode byte for %s!' % (self,));
1387 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1388
1389 # Full hex byte form.
1390 if sOpcode[:2] == '0x':
1391 return int(sOpcode, 16);
1392
1393 # The /r form:
1394 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1395 return int(sOpcode[-1:]) << 3;
1396
1397 # The 11/r form:
1398 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1399 return (int(sOpcode[-1:]) << 3) | 0xc0;
1400
1401 # The !11/r form (returns mod=1):
1402 ## @todo this doesn't really work...
1403 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1404 return (int(sOpcode[-1:]) << 3) | 0x80;
1405
1406 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1407
1408 @staticmethod
1409 def _flagsToIntegerMask(asFlags):
1410 """
1411 Returns the integer mask value for asFlags.
1412 """
1413 uRet = 0;
1414 if asFlags:
1415 for sFlag in asFlags:
1416 sConstant = g_kdEFlagsMnemonics[sFlag];
1417 assert sConstant[0] != '!', sConstant
1418 uRet |= g_kdX86EFlagsConstants[sConstant];
1419 return uRet;
1420
1421 def getTestedFlagsMask(self):
1422 """ Returns asFlTest into a integer mask value """
1423 return self._flagsToIntegerMask(self.asFlTest);
1424
1425 def getModifiedFlagsMask(self):
1426 """ Returns asFlModify into a integer mask value """
1427 return self._flagsToIntegerMask(self.asFlModify);
1428
1429 def getUndefinedFlagsMask(self):
1430 """ Returns asFlUndefined into a integer mask value """
1431 return self._flagsToIntegerMask(self.asFlUndefined);
1432
1433 def getSetFlagsMask(self):
1434 """ Returns asFlSet into a integer mask value """
1435 return self._flagsToIntegerMask(self.asFlSet);
1436
1437 def getClearedFlagsMask(self):
1438 """ Returns asFlClear into a integer mask value """
1439 return self._flagsToIntegerMask(self.asFlClear);
1440
1441
1442## All the instructions.
1443g_aoAllInstructions = []; # type: list(Instruction)
1444
1445## All the instructions indexed by statistics name (opstat).
1446g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1447
1448## All the instructions indexed by function name (opfunction).
1449g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1450
1451## Instructions tagged by oponlytest
1452g_aoOnlyTestInstructions = []; # type: list(Instruction)
1453
1454## Instruction maps.
1455g_dInstructionMaps = {
1456 'one': InstructionMap('one'),
1457 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1458 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1459 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1460 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1461 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1462 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1463 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1464 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1465 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1466 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1467 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1468 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1469 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1470 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1471 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1472 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1473 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1474 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1475 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1476
1477 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1478 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1479 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1480 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1481 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1482 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1483 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1484 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1485 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1486 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1487 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1488 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1489 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1490 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1491
1492 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1493 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1494
1495 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1496 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1497 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1498 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1499 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1500 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1501
1502 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1503 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1504
1505 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1506 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1507 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1508 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1509 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1510 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1511 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1512 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1513};
1514
1515
1516
1517class ParserException(Exception):
1518 """ Parser exception """
1519 def __init__(self, sMessage):
1520 Exception.__init__(self, sMessage);
1521
1522
1523class SimpleParser(object):
1524 """
1525 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1526 """
1527
1528 ## @name Parser state.
1529 ## @{
1530 kiCode = 0;
1531 kiCommentMulti = 1;
1532 ## @}
1533
1534 def __init__(self, sSrcFile, asLines, sDefaultMap):
1535 self.sSrcFile = sSrcFile;
1536 self.asLines = asLines;
1537 self.iLine = 0;
1538 self.iState = self.kiCode;
1539 self.sComment = '';
1540 self.iCommentLine = 0;
1541 self.aoCurInstrs = [];
1542
1543 assert sDefaultMap in g_dInstructionMaps;
1544 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1545
1546 self.cTotalInstr = 0;
1547 self.cTotalStubs = 0;
1548 self.cTotalTagged = 0;
1549
1550 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1551 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1552 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1553 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1554 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1555 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1556 self.fDebug = True;
1557
1558 self.dTagHandlers = {
1559 '@opbrief': self.parseTagOpBrief,
1560 '@opdesc': self.parseTagOpDesc,
1561 '@opmnemonic': self.parseTagOpMnemonic,
1562 '@op1': self.parseTagOpOperandN,
1563 '@op2': self.parseTagOpOperandN,
1564 '@op3': self.parseTagOpOperandN,
1565 '@op4': self.parseTagOpOperandN,
1566 '@oppfx': self.parseTagOpPfx,
1567 '@opmaps': self.parseTagOpMaps,
1568 '@opcode': self.parseTagOpcode,
1569 '@opcodesub': self.parseTagOpcodeSub,
1570 '@openc': self.parseTagOpEnc,
1571 '@opfltest': self.parseTagOpEFlags,
1572 '@opflmodify': self.parseTagOpEFlags,
1573 '@opflundef': self.parseTagOpEFlags,
1574 '@opflset': self.parseTagOpEFlags,
1575 '@opflclear': self.parseTagOpEFlags,
1576 '@ophints': self.parseTagOpHints,
1577 '@opdisenum': self.parseTagOpDisEnum,
1578 '@opmincpu': self.parseTagOpMinCpu,
1579 '@opcpuid': self.parseTagOpCpuId,
1580 '@opgroup': self.parseTagOpGroup,
1581 '@opunused': self.parseTagOpUnusedInvalid,
1582 '@opinvalid': self.parseTagOpUnusedInvalid,
1583 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1584 '@optest': self.parseTagOpTest,
1585 '@optestign': self.parseTagOpTestIgnore,
1586 '@optestignore': self.parseTagOpTestIgnore,
1587 '@opcopytests': self.parseTagOpCopyTests,
1588 '@oponly': self.parseTagOpOnlyTest,
1589 '@oponlytest': self.parseTagOpOnlyTest,
1590 '@opxcpttype': self.parseTagOpXcptType,
1591 '@opstats': self.parseTagOpStats,
1592 '@opfunction': self.parseTagOpFunction,
1593 '@opdone': self.parseTagOpDone,
1594 };
1595 for i in range(48):
1596 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1597 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1598
1599 self.asErrors = [];
1600
1601 def raiseError(self, sMessage):
1602 """
1603 Raise error prefixed with the source and line number.
1604 """
1605 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1606
1607 def raiseCommentError(self, iLineInComment, sMessage):
1608 """
1609 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1610 """
1611 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1612
1613 def error(self, sMessage):
1614 """
1615 Adds an error.
1616 returns False;
1617 """
1618 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1619 return False;
1620
1621 def errorComment(self, iLineInComment, sMessage):
1622 """
1623 Adds a comment error.
1624 returns False;
1625 """
1626 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1627 return False;
1628
1629 def printErrors(self):
1630 """
1631 Print the errors to stderr.
1632 Returns number of errors.
1633 """
1634 if self.asErrors:
1635 sys.stderr.write(u''.join(self.asErrors));
1636 return len(self.asErrors);
1637
1638 def debug(self, sMessage):
1639 """
1640 For debugging.
1641 """
1642 if self.fDebug:
1643 print('debug: %s' % (sMessage,));
1644
1645
1646 def addInstruction(self, iLine = None):
1647 """
1648 Adds an instruction.
1649 """
1650 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1651 g_aoAllInstructions.append(oInstr);
1652 self.aoCurInstrs.append(oInstr);
1653 return oInstr;
1654
1655 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1656 """
1657 Derives the mnemonic and operands from a IEM stats base name like string.
1658 """
1659 if oInstr.sMnemonic is None:
1660 asWords = sStats.split('_');
1661 oInstr.sMnemonic = asWords[0].lower();
1662 if len(asWords) > 1 and not oInstr.aoOperands:
1663 for sType in asWords[1:]:
1664 if sType in g_kdOpTypes:
1665 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1666 else:
1667 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1668 return False;
1669 return True;
1670
1671 def doneInstructionOne(self, oInstr, iLine):
1672 """
1673 Complete the parsing by processing, validating and expanding raw inputs.
1674 """
1675 assert oInstr.iLineCompleted is None;
1676 oInstr.iLineCompleted = iLine;
1677
1678 #
1679 # Specified instructions.
1680 #
1681 if oInstr.cOpTags > 0:
1682 if oInstr.sStats is None:
1683 pass;
1684
1685 #
1686 # Unspecified legacy stuff. We generally only got a few things to go on here.
1687 # /** Opcode 0x0f 0x00 /0. */
1688 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1689 #
1690 else:
1691 #if oInstr.sRawOldOpcodes:
1692 #
1693 #if oInstr.sMnemonic:
1694 pass;
1695
1696 #
1697 # Common defaults.
1698 #
1699
1700 # Guess mnemonic and operands from stats if the former is missing.
1701 if oInstr.sMnemonic is None:
1702 if oInstr.sStats is not None:
1703 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1704 elif oInstr.sFunction is not None:
1705 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1706
1707 # Derive the disassembler op enum constant from the mnemonic.
1708 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1709 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1710
1711 # Derive the IEM statistics base name from mnemonic and operand types.
1712 if oInstr.sStats is None:
1713 if oInstr.sFunction is not None:
1714 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1715 elif oInstr.sMnemonic is not None:
1716 oInstr.sStats = oInstr.sMnemonic;
1717 for oOperand in oInstr.aoOperands:
1718 if oOperand.sType:
1719 oInstr.sStats += '_' + oOperand.sType;
1720
1721 # Derive the IEM function name from mnemonic and operand types.
1722 if oInstr.sFunction is None:
1723 if oInstr.sMnemonic is not None:
1724 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1725 for oOperand in oInstr.aoOperands:
1726 if oOperand.sType:
1727 oInstr.sFunction += '_' + oOperand.sType;
1728 elif oInstr.sStats:
1729 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1730
1731 # Derive encoding from operands.
1732 if oInstr.sEncoding is None:
1733 if not oInstr.aoOperands:
1734 if oInstr.fUnused and oInstr.sSubOpcode:
1735 oInstr.sEncoding = 'ModR/M';
1736 else:
1737 oInstr.sEncoding = 'fixed';
1738 elif oInstr.aoOperands[0].usesModRM():
1739 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1740 oInstr.sEncoding = 'ModR/M+VEX';
1741 else:
1742 oInstr.sEncoding = 'ModR/M';
1743
1744 #
1745 # Apply default map and then add the instruction to all it's groups.
1746 #
1747 if not oInstr.aoMaps:
1748 oInstr.aoMaps = [ self.oDefaultMap, ];
1749 for oMap in oInstr.aoMaps:
1750 oMap.aoInstructions.append(oInstr);
1751
1752 #
1753 # Check the opstat value and add it to the opstat indexed dictionary.
1754 #
1755 if oInstr.sStats:
1756 if oInstr.sStats not in g_dAllInstructionsByStat:
1757 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1758 else:
1759 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1760 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1761
1762 #
1763 # Add to function indexed dictionary. We allow multiple instructions per function.
1764 #
1765 if oInstr.sFunction:
1766 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1767 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1768 else:
1769 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1770
1771 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1772 return True;
1773
1774 def doneInstructions(self, iLineInComment = None):
1775 """
1776 Done with current instruction.
1777 """
1778 for oInstr in self.aoCurInstrs:
1779 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1780 if oInstr.fStub:
1781 self.cTotalStubs += 1;
1782
1783 self.cTotalInstr += len(self.aoCurInstrs);
1784
1785 self.sComment = '';
1786 self.aoCurInstrs = [];
1787 return True;
1788
1789 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1790 """
1791 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1792 is False, only None values and empty strings are replaced.
1793 """
1794 for oInstr in self.aoCurInstrs:
1795 if fOverwrite is not True:
1796 oOldValue = getattr(oInstr, sAttrib);
1797 if oOldValue is not None:
1798 continue;
1799 setattr(oInstr, sAttrib, oValue);
1800
1801 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1802 """
1803 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1804 If fOverwrite is False, only None values and empty strings are replaced.
1805 """
1806 for oInstr in self.aoCurInstrs:
1807 aoArray = getattr(oInstr, sAttrib);
1808 while len(aoArray) <= iEntry:
1809 aoArray.append(None);
1810 if fOverwrite is True or aoArray[iEntry] is None:
1811 aoArray[iEntry] = oValue;
1812
1813 def parseCommentOldOpcode(self, asLines):
1814 """ Deals with 'Opcode 0xff /4' like comments """
1815 asWords = asLines[0].split();
1816 if len(asWords) >= 2 \
1817 and asWords[0] == 'Opcode' \
1818 and ( asWords[1].startswith('0x')
1819 or asWords[1].startswith('0X')):
1820 asWords = asWords[:1];
1821 for iWord, sWord in enumerate(asWords):
1822 if sWord.startswith('0X'):
1823 sWord = '0x' + sWord[:2];
1824 asWords[iWord] = asWords;
1825 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1826
1827 return False;
1828
1829 def ensureInstructionForOpTag(self, iTagLine):
1830 """ Ensure there is an instruction for the op-tag being parsed. """
1831 if not self.aoCurInstrs:
1832 self.addInstruction(self.iCommentLine + iTagLine);
1833 for oInstr in self.aoCurInstrs:
1834 oInstr.cOpTags += 1;
1835 if oInstr.cOpTags == 1:
1836 self.cTotalTagged += 1;
1837 return self.aoCurInstrs[-1];
1838
1839 @staticmethod
1840 def flattenSections(aasSections):
1841 """
1842 Flattens multiline sections into stripped single strings.
1843 Returns list of strings, on section per string.
1844 """
1845 asRet = [];
1846 for asLines in aasSections:
1847 if asLines:
1848 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1849 return asRet;
1850
1851 @staticmethod
1852 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1853 """
1854 Flattens sections into a simple stripped string with newlines as
1855 section breaks. The final section does not sport a trailing newline.
1856 """
1857 # Typical: One section with a single line.
1858 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1859 return aasSections[0][0].strip();
1860
1861 sRet = '';
1862 for iSection, asLines in enumerate(aasSections):
1863 if asLines:
1864 if iSection > 0:
1865 sRet += sSectionSep;
1866 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1867 return sRet;
1868
1869
1870
1871 ## @name Tag parsers
1872 ## @{
1873
1874 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1875 """
1876 Tag: \@opbrief
1877 Value: Text description, multiple sections, appended.
1878
1879 Brief description. If not given, it's the first sentence from @opdesc.
1880 """
1881 oInstr = self.ensureInstructionForOpTag(iTagLine);
1882
1883 # Flatten and validate the value.
1884 sBrief = self.flattenAllSections(aasSections);
1885 if not sBrief:
1886 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1887 if sBrief[-1] != '.':
1888 sBrief = sBrief + '.';
1889 if len(sBrief) > 180:
1890 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1891 offDot = sBrief.find('.');
1892 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1893 offDot = sBrief.find('.', offDot + 1);
1894 if offDot >= 0 and offDot != len(sBrief) - 1:
1895 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1896
1897 # Update the instruction.
1898 if oInstr.sBrief is not None:
1899 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1900 % (sTag, oInstr.sBrief, sBrief,));
1901 _ = iEndLine;
1902 return True;
1903
1904 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1905 """
1906 Tag: \@opdesc
1907 Value: Text description, multiple sections, appended.
1908
1909 It is used to describe instructions.
1910 """
1911 oInstr = self.ensureInstructionForOpTag(iTagLine);
1912 if aasSections:
1913 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1914 return True;
1915
1916 _ = sTag; _ = iEndLine;
1917 return True;
1918
1919 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1920 """
1921 Tag: @opmenmonic
1922 Value: mnemonic
1923
1924 The 'mnemonic' value must be a valid C identifier string. Because of
1925 prefixes, groups and whatnot, there times when the mnemonic isn't that
1926 of an actual assembler mnemonic.
1927 """
1928 oInstr = self.ensureInstructionForOpTag(iTagLine);
1929
1930 # Flatten and validate the value.
1931 sMnemonic = self.flattenAllSections(aasSections);
1932 if not self.oReMnemonic.match(sMnemonic):
1933 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1934 if oInstr.sMnemonic is not None:
1935 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1936 % (sTag, oInstr.sMnemonic, sMnemonic,));
1937 oInstr.sMnemonic = sMnemonic
1938
1939 _ = iEndLine;
1940 return True;
1941
1942 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1943 """
1944 Tags: \@op1, \@op2, \@op3, \@op4
1945 Value: [where:]type
1946
1947 The 'where' value indicates where the operand is found, like the 'reg'
1948 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1949 a list.
1950
1951 The 'type' value indicates the operand type. These follow the types
1952 given in the opcode tables in the CPU reference manuals.
1953 See Instruction.kdOperandTypes for a list.
1954
1955 """
1956 oInstr = self.ensureInstructionForOpTag(iTagLine);
1957 idxOp = int(sTag[-1]) - 1;
1958 assert idxOp >= 0 and idxOp < 4;
1959
1960 # flatten, split up, and validate the "where:type" value.
1961 sFlattened = self.flattenAllSections(aasSections);
1962 asSplit = sFlattened.split(':');
1963 if len(asSplit) == 1:
1964 sType = asSplit[0];
1965 sWhere = None;
1966 elif len(asSplit) == 2:
1967 (sWhere, sType) = asSplit;
1968 else:
1969 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1970
1971 if sType not in g_kdOpTypes:
1972 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1973 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1974 if sWhere is None:
1975 sWhere = g_kdOpTypes[sType][1];
1976 elif sWhere not in g_kdOpLocations:
1977 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1978 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1979
1980 # Insert the operand, refusing to overwrite an existing one.
1981 while idxOp >= len(oInstr.aoOperands):
1982 oInstr.aoOperands.append(None);
1983 if oInstr.aoOperands[idxOp] is not None:
1984 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1985 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1986 sWhere, sType,));
1987 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1988
1989 _ = iEndLine;
1990 return True;
1991
1992 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1993 """
1994 Tag: \@opmaps
1995 Value: map[,map2]
1996
1997 Indicates which maps the instruction is in. There is a default map
1998 associated with each input file.
1999 """
2000 oInstr = self.ensureInstructionForOpTag(iTagLine);
2001
2002 # Flatten, split up and validate the value.
2003 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2004 asMaps = sFlattened.split(',');
2005 if not asMaps:
2006 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2007 for sMap in asMaps:
2008 if sMap not in g_dInstructionMaps:
2009 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2010 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2011
2012 # Add the maps to the current list. Throw errors on duplicates.
2013 for oMap in oInstr.aoMaps:
2014 if oMap.sName in asMaps:
2015 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2016
2017 for sMap in asMaps:
2018 oMap = g_dInstructionMaps[sMap];
2019 if oMap not in oInstr.aoMaps:
2020 oInstr.aoMaps.append(oMap);
2021 else:
2022 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2023
2024 _ = iEndLine;
2025 return True;
2026
2027 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2028 """
2029 Tag: \@oppfx
2030 Value: n/a|none|0x66|0xf3|0xf2
2031
2032 Required prefix for the instruction. (In a (E)VEX context this is the
2033 value of the 'pp' field rather than an actual prefix.)
2034 """
2035 oInstr = self.ensureInstructionForOpTag(iTagLine);
2036
2037 # Flatten and validate the value.
2038 sFlattened = self.flattenAllSections(aasSections);
2039 asPrefixes = sFlattened.split();
2040 if len(asPrefixes) > 1:
2041 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2042
2043 sPrefix = asPrefixes[0].lower();
2044 if sPrefix == 'none':
2045 sPrefix = 'none';
2046 elif sPrefix == 'n/a':
2047 sPrefix = None;
2048 else:
2049 if len(sPrefix) == 2:
2050 sPrefix = '0x' + sPrefix;
2051 if not _isValidOpcodeByte(sPrefix):
2052 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2053
2054 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2055 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2056
2057 # Set it.
2058 if oInstr.sPrefix is not None:
2059 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2060 oInstr.sPrefix = sPrefix;
2061
2062 _ = iEndLine;
2063 return True;
2064
2065 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2066 """
2067 Tag: \@opcode
2068 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2069
2070 The opcode byte or sub-byte for the instruction in the context of a map.
2071 """
2072 oInstr = self.ensureInstructionForOpTag(iTagLine);
2073
2074 # Flatten and validate the value.
2075 sOpcode = self.flattenAllSections(aasSections);
2076 if _isValidOpcodeByte(sOpcode):
2077 pass;
2078 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2079 pass;
2080 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2081 pass;
2082 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2083 pass;
2084 else:
2085 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2086
2087 # Set it.
2088 if oInstr.sOpcode is not None:
2089 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2090 oInstr.sOpcode = sOpcode;
2091
2092 _ = iEndLine;
2093 return True;
2094
2095 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2096 """
2097 Tag: \@opcodesub
2098 Value: none | 11 mr/reg | !11 mr/reg
2099
2100 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2101 represents exactly two different instructions. The more proper way would
2102 be to go via maps with two members, but this is faster.
2103 """
2104 oInstr = self.ensureInstructionForOpTag(iTagLine);
2105
2106 # Flatten and validate the value.
2107 sSubOpcode = self.flattenAllSections(aasSections);
2108 if sSubOpcode not in g_kdSubOpcodes:
2109 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2110 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2111
2112 # Set it.
2113 if oInstr.sSubOpcode is not None:
2114 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2115 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2116 oInstr.sSubOpcode = sSubOpcode;
2117
2118 _ = iEndLine;
2119 return True;
2120
2121 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2122 """
2123 Tag: \@openc
2124 Value: ModR/M|fixed|prefix|<map name>
2125
2126 The instruction operand encoding style.
2127 """
2128 oInstr = self.ensureInstructionForOpTag(iTagLine);
2129
2130 # Flatten and validate the value.
2131 sEncoding = self.flattenAllSections(aasSections);
2132 if sEncoding in g_kdEncodings:
2133 pass;
2134 elif sEncoding in g_dInstructionMaps:
2135 pass;
2136 elif not _isValidOpcodeByte(sEncoding):
2137 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2138
2139 # Set it.
2140 if oInstr.sEncoding is not None:
2141 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2142 % ( sTag, oInstr.sEncoding, sEncoding,));
2143 oInstr.sEncoding = sEncoding;
2144
2145 _ = iEndLine;
2146 return True;
2147
2148 ## EFlags tag to Instruction attribute name.
2149 kdOpFlagToAttr = {
2150 '@opfltest': 'asFlTest',
2151 '@opflmodify': 'asFlModify',
2152 '@opflundef': 'asFlUndefined',
2153 '@opflset': 'asFlSet',
2154 '@opflclear': 'asFlClear',
2155 };
2156
2157 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2158 """
2159 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2160 Value: <eflags specifier>
2161
2162 """
2163 oInstr = self.ensureInstructionForOpTag(iTagLine);
2164
2165 # Flatten, split up and validate the values.
2166 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2167 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2168 asFlags = [];
2169 else:
2170 fRc = True;
2171 for iFlag, sFlag in enumerate(asFlags):
2172 if sFlag not in g_kdEFlagsMnemonics:
2173 if sFlag.strip() in g_kdEFlagsMnemonics:
2174 asFlags[iFlag] = sFlag.strip();
2175 else:
2176 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2177 if not fRc:
2178 return False;
2179
2180 # Set them.
2181 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2182 if asOld is not None:
2183 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2184 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2185
2186 _ = iEndLine;
2187 return True;
2188
2189 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2190 """
2191 Tag: \@ophints
2192 Value: Comma or space separated list of flags and hints.
2193
2194 This covers the disassembler flags table and more.
2195 """
2196 oInstr = self.ensureInstructionForOpTag(iTagLine);
2197
2198 # Flatten as a space separated list, split it up and validate the values.
2199 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2200 if len(asHints) == 1 and asHints[0].lower() == 'none':
2201 asHints = [];
2202 else:
2203 fRc = True;
2204 for iHint, sHint in enumerate(asHints):
2205 if sHint not in g_kdHints:
2206 if sHint.strip() in g_kdHints:
2207 sHint[iHint] = sHint.strip();
2208 else:
2209 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2210 if not fRc:
2211 return False;
2212
2213 # Append them.
2214 for sHint in asHints:
2215 if sHint not in oInstr.dHints:
2216 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2217 else:
2218 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2219
2220 _ = iEndLine;
2221 return True;
2222
2223 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2224 """
2225 Tag: \@opdisenum
2226 Value: OP_XXXX
2227
2228 This is for select a specific (legacy) disassembler enum value for the
2229 instruction.
2230 """
2231 oInstr = self.ensureInstructionForOpTag(iTagLine);
2232
2233 # Flatten and split.
2234 asWords = self.flattenAllSections(aasSections).split();
2235 if len(asWords) != 1:
2236 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2237 if not asWords:
2238 return False;
2239 sDisEnum = asWords[0];
2240 if not self.oReDisEnum.match(sDisEnum):
2241 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2242 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2243
2244 # Set it.
2245 if oInstr.sDisEnum is not None:
2246 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2247 oInstr.sDisEnum = sDisEnum;
2248
2249 _ = iEndLine;
2250 return True;
2251
2252 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2253 """
2254 Tag: \@opmincpu
2255 Value: <simple CPU name>
2256
2257 Indicates when this instruction was introduced.
2258 """
2259 oInstr = self.ensureInstructionForOpTag(iTagLine);
2260
2261 # Flatten the value, split into words, make sure there's just one, valid it.
2262 asCpus = self.flattenAllSections(aasSections).split();
2263 if len(asCpus) > 1:
2264 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2265
2266 sMinCpu = asCpus[0];
2267 if sMinCpu in g_kdCpuNames:
2268 oInstr.sMinCpu = sMinCpu;
2269 else:
2270 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2271 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2272
2273 # Set it.
2274 if oInstr.sMinCpu is None:
2275 oInstr.sMinCpu = sMinCpu;
2276 elif oInstr.sMinCpu != sMinCpu:
2277 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2278
2279 _ = iEndLine;
2280 return True;
2281
2282 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2283 """
2284 Tag: \@opcpuid
2285 Value: none | <CPUID flag specifier>
2286
2287 CPUID feature bit which is required for the instruction to be present.
2288 """
2289 oInstr = self.ensureInstructionForOpTag(iTagLine);
2290
2291 # Flatten as a space separated list, split it up and validate the values.
2292 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2293 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2294 asCpuIds = [];
2295 else:
2296 fRc = True;
2297 for iCpuId, sCpuId in enumerate(asCpuIds):
2298 if sCpuId not in g_kdCpuIdFlags:
2299 if sCpuId.strip() in g_kdCpuIdFlags:
2300 sCpuId[iCpuId] = sCpuId.strip();
2301 else:
2302 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2303 if not fRc:
2304 return False;
2305
2306 # Append them.
2307 for sCpuId in asCpuIds:
2308 if sCpuId not in oInstr.asCpuIds:
2309 oInstr.asCpuIds.append(sCpuId);
2310 else:
2311 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2312
2313 _ = iEndLine;
2314 return True;
2315
2316 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2317 """
2318 Tag: \@opgroup
2319 Value: op_grp1[_subgrp2[_subsubgrp3]]
2320
2321 Instruction grouping.
2322 """
2323 oInstr = self.ensureInstructionForOpTag(iTagLine);
2324
2325 # Flatten as a space separated list, split it up and validate the values.
2326 asGroups = self.flattenAllSections(aasSections).split();
2327 if len(asGroups) != 1:
2328 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2329 sGroup = asGroups[0];
2330 if not self.oReGroupName.match(sGroup):
2331 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2332 % (sTag, sGroup, self.oReGroupName.pattern));
2333
2334 # Set it.
2335 if oInstr.sGroup is not None:
2336 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2337 oInstr.sGroup = sGroup;
2338
2339 _ = iEndLine;
2340 return True;
2341
2342 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2343 """
2344 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2345 Value: <invalid opcode behaviour style>
2346
2347 The \@opunused indicates the specification is for a currently unused
2348 instruction encoding.
2349
2350 The \@opinvalid indicates the specification is for an invalid currently
2351 instruction encoding (like UD2).
2352
2353 The \@opinvlstyle just indicates how CPUs decode the instruction when
2354 not supported (\@opcpuid, \@opmincpu) or disabled.
2355 """
2356 oInstr = self.ensureInstructionForOpTag(iTagLine);
2357
2358 # Flatten as a space separated list, split it up and validate the values.
2359 asStyles = self.flattenAllSections(aasSections).split();
2360 if len(asStyles) != 1:
2361 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2362 sStyle = asStyles[0];
2363 if sStyle not in g_kdInvalidStyles:
2364 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2365 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2366 # Set it.
2367 if oInstr.sInvalidStyle is not None:
2368 return self.errorComment(iTagLine,
2369 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2370 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2371 oInstr.sInvalidStyle = sStyle;
2372 if sTag == '@opunused':
2373 oInstr.fUnused = True;
2374 elif sTag == '@opinvalid':
2375 oInstr.fInvalid = True;
2376
2377 _ = iEndLine;
2378 return True;
2379
2380 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2381 """
2382 Tag: \@optest
2383 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2384 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2385
2386 The main idea here is to generate basic instruction tests.
2387
2388 The probably simplest way of handling the diverse input, would be to use
2389 it to produce size optimized byte code for a simple interpreter that
2390 modifies the register input and output states.
2391
2392 An alternative to the interpreter would be creating multiple tables,
2393 but that becomes rather complicated wrt what goes where and then to use
2394 them in an efficient manner.
2395 """
2396 oInstr = self.ensureInstructionForOpTag(iTagLine);
2397
2398 #
2399 # Do it section by section.
2400 #
2401 for asSectionLines in aasSections:
2402 #
2403 # Sort the input into outputs, inputs and selector conditions.
2404 #
2405 sFlatSection = self.flattenAllSections([asSectionLines,]);
2406 if not sFlatSection:
2407 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2408 continue;
2409 oTest = InstructionTest(oInstr);
2410
2411 asSelectors = [];
2412 asInputs = [];
2413 asOutputs = [];
2414 asCur = asOutputs;
2415 fRc = True;
2416 asWords = sFlatSection.split();
2417 for iWord in range(len(asWords) - 1, -1, -1):
2418 sWord = asWords[iWord];
2419 # Check for array switchers.
2420 if sWord == '->':
2421 if asCur != asOutputs:
2422 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2423 break;
2424 asCur = asInputs;
2425 elif sWord == '/':
2426 if asCur != asInputs:
2427 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2428 break;
2429 asCur = asSelectors;
2430 else:
2431 asCur.insert(0, sWord);
2432
2433 #
2434 # Validate and add selectors.
2435 #
2436 for sCond in asSelectors:
2437 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2438 oSelector = None;
2439 for sOp in TestSelector.kasCompareOps:
2440 off = sCondExp.find(sOp);
2441 if off >= 0:
2442 sVariable = sCondExp[:off];
2443 sValue = sCondExp[off + len(sOp):];
2444 if sVariable in TestSelector.kdVariables:
2445 if sValue in TestSelector.kdVariables[sVariable]:
2446 oSelector = TestSelector(sVariable, sOp, sValue);
2447 else:
2448 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2449 % ( sTag, sValue, sCond,
2450 TestSelector.kdVariables[sVariable].keys(),));
2451 else:
2452 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2453 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2454 break;
2455 if oSelector is not None:
2456 for oExisting in oTest.aoSelectors:
2457 if oExisting.sVariable == oSelector.sVariable:
2458 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2459 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2460 oTest.aoSelectors.append(oSelector);
2461 else:
2462 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2463
2464 #
2465 # Validate outputs and inputs, adding them to the test as we go along.
2466 #
2467 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2468 asValidFieldKinds = [ 'both', sDesc, ];
2469 for sItem in asItems:
2470 oItem = None;
2471 for sOp in TestInOut.kasOperators:
2472 off = sItem.find(sOp);
2473 if off < 0:
2474 continue;
2475 sField = sItem[:off];
2476 sValueType = sItem[off + len(sOp):];
2477 if sField in TestInOut.kdFields \
2478 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2479 asSplit = sValueType.split(':', 1);
2480 sValue = asSplit[0];
2481 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2482 if sType in TestInOut.kdTypes:
2483 oValid = TestInOut.kdTypes[sType].validate(sValue);
2484 if oValid is True:
2485 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2486 oItem = TestInOut(sField, sOp, sValue, sType);
2487 else:
2488 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2489 % ( sTag, sDesc, sItem, ));
2490 else:
2491 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2492 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2493 else:
2494 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2495 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2496 else:
2497 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2498 % ( sTag, sDesc, sField, sItem,
2499 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2500 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2501 break;
2502 if oItem is not None:
2503 for oExisting in aoDst:
2504 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2505 self.errorComment(iTagLine,
2506 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2507 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2508 aoDst.append(oItem);
2509 else:
2510 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2511
2512 #
2513 # .
2514 #
2515 if fRc:
2516 oInstr.aoTests.append(oTest);
2517 else:
2518 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2519 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2520 % (sTag, asSelectors, asInputs, asOutputs,));
2521
2522 _ = iEndLine;
2523 return True;
2524
2525 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2526 """
2527 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2528 """
2529 oInstr = self.ensureInstructionForOpTag(iTagLine);
2530
2531 iTest = 0;
2532 if sTag[-1] == ']':
2533 iTest = int(sTag[8:-1]);
2534 else:
2535 iTest = int(sTag[7:]);
2536
2537 if iTest != len(oInstr.aoTests):
2538 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2539 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2540
2541 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2542 """
2543 Tag: \@optestign | \@optestignore
2544 Value: <value is ignored>
2545
2546 This is a simple trick to ignore a test while debugging another.
2547
2548 See also \@oponlytest.
2549 """
2550 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2551 return True;
2552
2553 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2554 """
2555 Tag: \@opcopytests
2556 Value: <opstat | function> [..]
2557 Example: \@opcopytests add_Eb_Gb
2558
2559 Trick to avoid duplicating tests for different encodings of the same
2560 operation.
2561 """
2562 oInstr = self.ensureInstructionForOpTag(iTagLine);
2563
2564 # Flatten, validate and append the copy job to the instruction. We execute
2565 # them after parsing all the input so we can handle forward references.
2566 asToCopy = self.flattenAllSections(aasSections).split();
2567 if not asToCopy:
2568 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2569 for sToCopy in asToCopy:
2570 if sToCopy not in oInstr.asCopyTests:
2571 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2572 oInstr.asCopyTests.append(sToCopy);
2573 else:
2574 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2575 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2576 else:
2577 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2578
2579 _ = iEndLine;
2580 return True;
2581
2582 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2583 """
2584 Tag: \@oponlytest | \@oponly
2585 Value: none
2586
2587 Only test instructions with this tag. This is a trick that is handy
2588 for singling out one or two new instructions or tests.
2589
2590 See also \@optestignore.
2591 """
2592 oInstr = self.ensureInstructionForOpTag(iTagLine);
2593
2594 # Validate and add instruction to only test dictionary.
2595 sValue = self.flattenAllSections(aasSections).strip();
2596 if sValue:
2597 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2598
2599 if oInstr not in g_aoOnlyTestInstructions:
2600 g_aoOnlyTestInstructions.append(oInstr);
2601
2602 _ = iEndLine;
2603 return True;
2604
2605 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2606 """
2607 Tag: \@opxcpttype
2608 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2609
2610 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2611 """
2612 oInstr = self.ensureInstructionForOpTag(iTagLine);
2613
2614 # Flatten as a space separated list, split it up and validate the values.
2615 asTypes = self.flattenAllSections(aasSections).split();
2616 if len(asTypes) != 1:
2617 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2618 sType = asTypes[0];
2619 if sType not in g_kdXcptTypes:
2620 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2621 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2622 # Set it.
2623 if oInstr.sXcptType is not None:
2624 return self.errorComment(iTagLine,
2625 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2626 % ( sTag, oInstr.sXcptType, sType,));
2627 oInstr.sXcptType = sType;
2628
2629 _ = iEndLine;
2630 return True;
2631
2632 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2633 """
2634 Tag: \@opfunction
2635 Value: <VMM function name>
2636
2637 This is for explicitly setting the IEM function name. Normally we pick
2638 this up from the FNIEMOP_XXX macro invocation after the description, or
2639 generate it from the mnemonic and operands.
2640
2641 It it thought it maybe necessary to set it when specifying instructions
2642 which implementation isn't following immediately or aren't implemented yet.
2643 """
2644 oInstr = self.ensureInstructionForOpTag(iTagLine);
2645
2646 # Flatten and validate the value.
2647 sFunction = self.flattenAllSections(aasSections);
2648 if not self.oReFunctionName.match(sFunction):
2649 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2650 % (sTag, sFunction, self.oReFunctionName.pattern));
2651
2652 if oInstr.sFunction is not None:
2653 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2654 % (sTag, oInstr.sFunction, sFunction,));
2655 oInstr.sFunction = sFunction;
2656
2657 _ = iEndLine;
2658 return True;
2659
2660 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2661 """
2662 Tag: \@opstats
2663 Value: <VMM statistics base name>
2664
2665 This is for explicitly setting the statistics name. Normally we pick
2666 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2667 the mnemonic and operands.
2668
2669 It it thought it maybe necessary to set it when specifying instructions
2670 which implementation isn't following immediately or aren't implemented yet.
2671 """
2672 oInstr = self.ensureInstructionForOpTag(iTagLine);
2673
2674 # Flatten and validate the value.
2675 sStats = self.flattenAllSections(aasSections);
2676 if not self.oReStatsName.match(sStats):
2677 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2678 % (sTag, sStats, self.oReStatsName.pattern));
2679
2680 if oInstr.sStats is not None:
2681 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2682 % (sTag, oInstr.sStats, sStats,));
2683 oInstr.sStats = sStats;
2684
2685 _ = iEndLine;
2686 return True;
2687
2688 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2689 """
2690 Tag: \@opdone
2691 Value: none
2692
2693 Used to explictily flush the instructions that have been specified.
2694 """
2695 sFlattened = self.flattenAllSections(aasSections);
2696 if sFlattened != '':
2697 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2698 _ = sTag; _ = iEndLine;
2699 return self.doneInstructions();
2700
2701 ## @}
2702
2703
2704 def parseComment(self):
2705 """
2706 Parse the current comment (self.sComment).
2707
2708 If it's a opcode specifiying comment, we reset the macro stuff.
2709 """
2710 #
2711 # Reject if comment doesn't seem to contain anything interesting.
2712 #
2713 if self.sComment.find('Opcode') < 0 \
2714 and self.sComment.find('@') < 0:
2715 return False;
2716
2717 #
2718 # Split the comment into lines, removing leading asterisks and spaces.
2719 # Also remove leading and trailing empty lines.
2720 #
2721 asLines = self.sComment.split('\n');
2722 for iLine, sLine in enumerate(asLines):
2723 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2724
2725 while asLines and not asLines[0]:
2726 self.iCommentLine += 1;
2727 asLines.pop(0);
2728
2729 while asLines and not asLines[-1]:
2730 asLines.pop(len(asLines) - 1);
2731
2732 #
2733 # Check for old style: Opcode 0x0f 0x12
2734 #
2735 if asLines[0].startswith('Opcode '):
2736 self.parseCommentOldOpcode(asLines);
2737
2738 #
2739 # Look for @op* tagged data.
2740 #
2741 cOpTags = 0;
2742 sFlatDefault = None;
2743 sCurTag = '@default';
2744 iCurTagLine = 0;
2745 asCurSection = [];
2746 aasSections = [ asCurSection, ];
2747 for iLine, sLine in enumerate(asLines):
2748 if not sLine.startswith('@'):
2749 if sLine:
2750 asCurSection.append(sLine);
2751 elif asCurSection:
2752 asCurSection = [];
2753 aasSections.append(asCurSection);
2754 else:
2755 #
2756 # Process the previous tag.
2757 #
2758 if not asCurSection and len(aasSections) > 1:
2759 aasSections.pop(-1);
2760 if sCurTag in self.dTagHandlers:
2761 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2762 cOpTags += 1;
2763 elif sCurTag.startswith('@op'):
2764 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2765 elif sCurTag == '@default':
2766 sFlatDefault = self.flattenAllSections(aasSections);
2767 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2768 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2769 elif sCurTag in ['@encoding', '@opencoding']:
2770 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2771
2772 #
2773 # New tag.
2774 #
2775 asSplit = sLine.split(None, 1);
2776 sCurTag = asSplit[0].lower();
2777 if len(asSplit) > 1:
2778 asCurSection = [asSplit[1],];
2779 else:
2780 asCurSection = [];
2781 aasSections = [asCurSection, ];
2782 iCurTagLine = iLine;
2783
2784 #
2785 # Process the final tag.
2786 #
2787 if not asCurSection and len(aasSections) > 1:
2788 aasSections.pop(-1);
2789 if sCurTag in self.dTagHandlers:
2790 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2791 cOpTags += 1;
2792 elif sCurTag.startswith('@op'):
2793 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2794 elif sCurTag == '@default':
2795 sFlatDefault = self.flattenAllSections(aasSections);
2796
2797 #
2798 # Don't allow default text in blocks containing @op*.
2799 #
2800 if cOpTags > 0 and sFlatDefault:
2801 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2802
2803 return True;
2804
2805 def parseMacroInvocation(self, sInvocation):
2806 """
2807 Parses a macro invocation.
2808
2809 Returns a tuple, first element is the offset following the macro
2810 invocation. The second element is a list of macro arguments, where the
2811 zero'th is the macro name.
2812 """
2813 # First the name.
2814 offOpen = sInvocation.find('(');
2815 if offOpen <= 0:
2816 self.raiseError("macro invocation open parenthesis not found");
2817 sName = sInvocation[:offOpen].strip();
2818 if not self.oReMacroName.match(sName):
2819 return self.error("invalid macro name '%s'" % (sName,));
2820 asRet = [sName, ];
2821
2822 # Arguments.
2823 iLine = self.iLine;
2824 cDepth = 1;
2825 off = offOpen + 1;
2826 offStart = off;
2827 while cDepth > 0:
2828 if off >= len(sInvocation):
2829 if iLine >= len(self.asLines):
2830 return self.error('macro invocation beyond end of file');
2831 sInvocation += self.asLines[iLine];
2832 iLine += 1;
2833 ch = sInvocation[off];
2834
2835 if ch == ',' or ch == ')':
2836 if cDepth == 1:
2837 asRet.append(sInvocation[offStart:off].strip());
2838 offStart = off + 1;
2839 if ch == ')':
2840 cDepth -= 1;
2841 elif ch == '(':
2842 cDepth += 1;
2843 off += 1;
2844
2845 return (off, asRet);
2846
2847 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2848 """
2849 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2850 """
2851 offHit = sCode.find(sMacro);
2852 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2853 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2854 return (offHit + offAfter, asRet);
2855 return (len(sCode), None);
2856
2857 def findAndParseMacroInvocation(self, sCode, sMacro):
2858 """
2859 Returns None if not found, arguments as per parseMacroInvocation if found.
2860 """
2861 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2862
2863 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2864 """
2865 Returns same as findAndParseMacroInvocation.
2866 """
2867 for sMacro in asMacro:
2868 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2869 if asRet is not None:
2870 return asRet;
2871 return None;
2872
2873 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2874 sDisHints, sIemHints, asOperands):
2875 """
2876 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2877 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2878 """
2879 #
2880 # Some invocation checks.
2881 #
2882 if sUpper != sUpper.upper():
2883 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2884 if sLower != sLower.lower():
2885 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2886 if sUpper.lower() != sLower:
2887 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2888 if not self.oReMnemonic.match(sLower):
2889 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2890
2891 #
2892 # Check if sIemHints tells us to not consider this macro invocation.
2893 #
2894 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2895 return True;
2896
2897 # Apply to the last instruction only for now.
2898 if not self.aoCurInstrs:
2899 self.addInstruction();
2900 oInstr = self.aoCurInstrs[-1];
2901 if oInstr.iLineMnemonicMacro == -1:
2902 oInstr.iLineMnemonicMacro = self.iLine;
2903 else:
2904 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2905 % (sMacro, oInstr.iLineMnemonicMacro,));
2906
2907 # Mnemonic
2908 if oInstr.sMnemonic is None:
2909 oInstr.sMnemonic = sLower;
2910 elif oInstr.sMnemonic != sLower:
2911 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2912
2913 # Process operands.
2914 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2915 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2916 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2917 for iOperand, sType in enumerate(asOperands):
2918 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2919 if sWhere is None:
2920 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2921 if iOperand < len(oInstr.aoOperands): # error recovery.
2922 sWhere = oInstr.aoOperands[iOperand].sWhere;
2923 sType = oInstr.aoOperands[iOperand].sType;
2924 else:
2925 sWhere = 'reg';
2926 sType = 'Gb';
2927 if iOperand == len(oInstr.aoOperands):
2928 oInstr.aoOperands.append(Operand(sWhere, sType))
2929 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2930 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2931 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2932 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2933
2934 # Encoding.
2935 if sForm not in g_kdIemForms:
2936 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2937 else:
2938 if oInstr.sEncoding is None:
2939 oInstr.sEncoding = g_kdIemForms[sForm][0];
2940 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2941 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2942 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2943
2944 # Check the parameter locations for the encoding.
2945 if g_kdIemForms[sForm][1] is not None:
2946 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2947 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2948 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2949 else:
2950 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2951 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2952 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2953 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2954
2955 # Stats.
2956 if not self.oReStatsName.match(sStats):
2957 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2958 elif oInstr.sStats is None:
2959 oInstr.sStats = sStats;
2960 elif oInstr.sStats != sStats:
2961 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2962 % (sMacro, oInstr.sStats, sStats,));
2963
2964 # Process the hints (simply merge with @ophints w/o checking anything).
2965 for sHint in sDisHints.split('|'):
2966 sHint = sHint.strip();
2967 if sHint.startswith('DISOPTYPE_'):
2968 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2969 if sShortHint in g_kdHints:
2970 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2971 else:
2972 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2973 elif sHint != '0':
2974 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2975
2976 for sHint in sIemHints.split('|'):
2977 sHint = sHint.strip();
2978 if sHint.startswith('IEMOPHINT_'):
2979 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2980 if sShortHint in g_kdHints:
2981 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2982 else:
2983 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2984 elif sHint != '0':
2985 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2986
2987
2988 _ = sAsm;
2989 return True;
2990
2991 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2992 """
2993 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2994 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2995 """
2996 if not asOperands:
2997 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2998 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2999 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3000
3001 def checkCodeForMacro(self, sCode):
3002 """
3003 Checks code for relevant macro invocation.
3004 """
3005 #
3006 # Scan macro invocations.
3007 #
3008 if sCode.find('(') > 0:
3009 # Look for instruction decoder function definitions. ASSUME single line.
3010 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3011 [ 'FNIEMOP_DEF',
3012 'FNIEMOP_STUB',
3013 'FNIEMOP_STUB_1',
3014 'FNIEMOP_UD_STUB',
3015 'FNIEMOP_UD_STUB_1' ]);
3016 if asArgs is not None:
3017 sFunction = asArgs[1];
3018
3019 if not self.aoCurInstrs:
3020 self.addInstruction();
3021 for oInstr in self.aoCurInstrs:
3022 if oInstr.iLineFnIemOpMacro == -1:
3023 oInstr.iLineFnIemOpMacro = self.iLine;
3024 else:
3025 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3026 self.setInstrunctionAttrib('sFunction', sFunction);
3027 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3028 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3029 if asArgs[0].find('STUB') > 0:
3030 self.doneInstructions();
3031 return True;
3032
3033 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3034 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3035 if asArgs is not None:
3036 if len(self.aoCurInstrs) == 1:
3037 oInstr = self.aoCurInstrs[0];
3038 if oInstr.sStats is None:
3039 oInstr.sStats = asArgs[1];
3040 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3041
3042 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3043 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3044 if asArgs is not None:
3045 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3046 []);
3047 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3048 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3049 if asArgs is not None:
3050 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3051 [asArgs[6],]);
3052 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3053 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3054 if asArgs is not None:
3055 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3056 [asArgs[6], asArgs[7]]);
3057 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3058 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3059 if asArgs is not None:
3060 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3061 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3062 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3063 # a_fIemHints)
3064 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3065 if asArgs is not None:
3066 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3067 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3068
3069 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3070 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3071 if asArgs is not None:
3072 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3073 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3074 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3075 if asArgs is not None:
3076 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3077 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3078 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3079 if asArgs is not None:
3080 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3081 [asArgs[4], asArgs[5],]);
3082 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3083 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3084 if asArgs is not None:
3085 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3086 [asArgs[4], asArgs[5], asArgs[6],]);
3087 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3088 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3089 if asArgs is not None:
3090 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3091 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3092
3093 return False;
3094
3095
3096 def parse(self):
3097 """
3098 Parses the given file.
3099 Returns number or errors.
3100 Raises exception on fatal trouble.
3101 """
3102 #self.debug('Parsing %s' % (self.sSrcFile,));
3103
3104 while self.iLine < len(self.asLines):
3105 sLine = self.asLines[self.iLine];
3106 self.iLine += 1;
3107
3108 # We only look for comments, so only lines with a slash might possibly
3109 # influence the parser state.
3110 offSlash = sLine.find('/');
3111 if offSlash >= 0:
3112 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3113 offLine = 0;
3114 while offLine < len(sLine):
3115 if self.iState == self.kiCode:
3116 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3117 if offHit >= 0:
3118 self.checkCodeForMacro(sLine[offLine:offHit]);
3119 self.sComment = '';
3120 self.iCommentLine = self.iLine;
3121 self.iState = self.kiCommentMulti;
3122 offLine = offHit + 2;
3123 else:
3124 self.checkCodeForMacro(sLine[offLine:]);
3125 offLine = len(sLine);
3126
3127 elif self.iState == self.kiCommentMulti:
3128 offHit = sLine.find('*/', offLine);
3129 if offHit >= 0:
3130 self.sComment += sLine[offLine:offHit];
3131 self.iState = self.kiCode;
3132 offLine = offHit + 2;
3133 self.parseComment();
3134 else:
3135 self.sComment += sLine[offLine:];
3136 offLine = len(sLine);
3137 else:
3138 assert False;
3139 # C++ line comment.
3140 elif offSlash > 0:
3141 self.checkCodeForMacro(sLine[:offSlash]);
3142
3143 # No slash, but append the line if in multi-line comment.
3144 elif self.iState == self.kiCommentMulti:
3145 #self.debug('line %d: multi' % (self.iLine,));
3146 self.sComment += sLine;
3147
3148 # No slash, but check code line for relevant macro.
3149 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3150 #self.debug('line %d: macro' % (self.iLine,));
3151 self.checkCodeForMacro(sLine);
3152
3153 # If the line is a '}' in the first position, complete the instructions.
3154 elif self.iState == self.kiCode and sLine[0] == '}':
3155 #self.debug('line %d: }' % (self.iLine,));
3156 self.doneInstructions();
3157
3158 self.doneInstructions();
3159 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3160 return self.printErrors();
3161
3162
3163def __parseFileByName(sSrcFile, sDefaultMap):
3164 """
3165 Parses one source file for instruction specfications.
3166 """
3167 #
3168 # Read sSrcFile into a line array.
3169 #
3170 try:
3171 oFile = open(sSrcFile, "r");
3172 except Exception as oXcpt:
3173 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3174 try:
3175 asLines = oFile.readlines();
3176 except Exception as oXcpt:
3177 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3178 finally:
3179 oFile.close();
3180
3181 #
3182 # Do the parsing.
3183 #
3184 try:
3185 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3186 except ParserException as oXcpt:
3187 print(str(oXcpt));
3188 raise;
3189 except Exception as oXcpt:
3190 raise;
3191
3192 return cErrors;
3193
3194
3195def __doTestCopying():
3196 """
3197 Executes the asCopyTests instructions.
3198 """
3199 asErrors = [];
3200 for oDstInstr in g_aoAllInstructions:
3201 if oDstInstr.asCopyTests:
3202 for sSrcInstr in oDstInstr.asCopyTests:
3203 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3204 if oSrcInstr:
3205 aoSrcInstrs = [oSrcInstr,];
3206 else:
3207 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3208 if aoSrcInstrs:
3209 for oSrcInstr in aoSrcInstrs:
3210 if oSrcInstr != oDstInstr:
3211 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3212 else:
3213 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3214 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3215 else:
3216 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3217 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3218
3219 if asErrors:
3220 sys.stderr.write(u''.join(asErrors));
3221 return len(asErrors);
3222
3223
3224def __applyOnlyTest():
3225 """
3226 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3227 all other instructions so that only these get tested.
3228 """
3229 if g_aoOnlyTestInstructions:
3230 for oInstr in g_aoAllInstructions:
3231 if oInstr.aoTests:
3232 if oInstr not in g_aoOnlyTestInstructions:
3233 oInstr.aoTests = [];
3234 return 0;
3235
3236def __parseAll():
3237 """
3238 Parses all the IEMAllInstruction*.cpp.h files.
3239
3240 Raises exception on failure.
3241 """
3242 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3243 cErrors = 0;
3244 for sDefaultMap, sName in [
3245 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3246 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3247 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3248 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3249 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3250 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3251 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3252 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3253 ]:
3254 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3255 cErrors += __doTestCopying();
3256 cErrors += __applyOnlyTest();
3257
3258 if cErrors != 0:
3259 #raise Exception('%d parse errors' % (cErrors,));
3260 sys.exit(1);
3261 return True;
3262
3263
3264
3265__parseAll();
3266
3267
3268#
3269# Generators (may perhaps move later).
3270#
3271def generateDisassemblerTables(oDstFile = sys.stdout):
3272 """
3273 Generates disassembler tables.
3274 """
3275
3276 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3277 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3278 assert oMap.sName == sName;
3279 asLines = [];
3280
3281 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3282 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3283 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3284 asLines.append('{');
3285
3286 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3287
3288 aoTableOrder = oMap.getInstructionsInTableOrder();
3289 for iInstr, oInstr in enumerate(aoTableOrder):
3290
3291 if (iInstr & 0xf) == 0:
3292 if iInstr != 0:
3293 asLines.append('');
3294 asLines.append(' /* %x */' % (iInstr >> 4,));
3295
3296 if oInstr is None:
3297 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3298 elif isinstance(oInstr, list):
3299 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3300 else:
3301 sMacro = 'OP';
3302 cMaxOperands = 3;
3303 if len(oInstr.aoOperands) > 3:
3304 sMacro = 'OPVEX'
3305 cMaxOperands = 4;
3306 assert len(oInstr.aoOperands) <= cMaxOperands;
3307
3308 #
3309 # Format string.
3310 #
3311 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3312 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3313 sTmp += ' ' if iOperand == 0 else ',';
3314 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3315 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3316 else:
3317 sTmp += g_kdOpTypes[oOperand.sType][2];
3318 sTmp += '",';
3319 asColumns = [ sTmp, ];
3320
3321 #
3322 # Decoders.
3323 #
3324 iStart = len(asColumns);
3325 if oInstr.sEncoding is None:
3326 pass;
3327 elif oInstr.sEncoding == 'ModR/M':
3328 # ASSUME the first operand is using the ModR/M encoding
3329 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3330 asColumns.append('IDX_ParseModRM,');
3331 ## @todo IDX_ParseVexDest
3332 # Is second operand using ModR/M too?
3333 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3334 asColumns.append('IDX_UseModRM,')
3335 elif oInstr.sEncoding in [ 'prefix', ]:
3336 for oOperand in oInstr.aoOperands:
3337 asColumns.append('0,');
3338 elif oInstr.sEncoding in [ 'fixed' ]:
3339 pass;
3340 elif oInstr.sEncoding == 'vex2':
3341 asColumns.append('IDX_ParseVex2b,')
3342 elif oInstr.sEncoding == 'vex3':
3343 asColumns.append('IDX_ParseVex3b,')
3344 elif oInstr.sEncoding in g_dInstructionMaps:
3345 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3346 else:
3347 ## @todo
3348 #IDX_ParseTwoByteEsc,
3349 #IDX_ParseGrp1,
3350 #IDX_ParseShiftGrp2,
3351 #IDX_ParseGrp3,
3352 #IDX_ParseGrp4,
3353 #IDX_ParseGrp5,
3354 #IDX_Parse3DNow,
3355 #IDX_ParseGrp6,
3356 #IDX_ParseGrp7,
3357 #IDX_ParseGrp8,
3358 #IDX_ParseGrp9,
3359 #IDX_ParseGrp10,
3360 #IDX_ParseGrp12,
3361 #IDX_ParseGrp13,
3362 #IDX_ParseGrp14,
3363 #IDX_ParseGrp15,
3364 #IDX_ParseGrp16,
3365 #IDX_ParseThreeByteEsc4,
3366 #IDX_ParseThreeByteEsc5,
3367 #IDX_ParseModFence,
3368 #IDX_ParseEscFP,
3369 #IDX_ParseNopPause,
3370 #IDX_ParseInvOpModRM,
3371 assert False, str(oInstr);
3372
3373 # Check for immediates and stuff in the remaining operands.
3374 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3375 sIdx = g_kdOpTypes[oOperand.sType][0];
3376 if sIdx != 'IDX_UseModRM':
3377 asColumns.append(sIdx + ',');
3378 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3379
3380 #
3381 # Opcode and operands.
3382 #
3383 assert oInstr.sDisEnum, str(oInstr);
3384 asColumns.append(oInstr.sDisEnum + ',');
3385 iStart = len(asColumns)
3386 for oOperand in oInstr.aoOperands:
3387 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3388 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3389
3390 #
3391 # Flags.
3392 #
3393 sTmp = '';
3394 for sHint in sorted(oInstr.dHints.keys()):
3395 sDefine = g_kdHints[sHint];
3396 if sDefine.startswith('DISOPTYPE_'):
3397 if sTmp:
3398 sTmp += ' | ' + sDefine;
3399 else:
3400 sTmp += sDefine;
3401 if sTmp:
3402 sTmp += '),';
3403 else:
3404 sTmp += '0),';
3405 asColumns.append(sTmp);
3406
3407 #
3408 # Format the columns into a line.
3409 #
3410 sLine = '';
3411 for i, s in enumerate(asColumns):
3412 if len(sLine) < aoffColumns[i]:
3413 sLine += ' ' * (aoffColumns[i] - len(sLine));
3414 else:
3415 sLine += ' ';
3416 sLine += s;
3417
3418 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3419 # DISOPTYPE_HARMLESS),
3420 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3421 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3422
3423 asLines.append(sLine);
3424
3425 asLines.append('};');
3426 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3427
3428 #
3429 # Write out the lines.
3430 #
3431 oDstFile.write('\n'.join(asLines));
3432 oDstFile.write('\n');
3433 break; #for now
3434
3435if __name__ == '__main__':
3436 generateDisassemblerTables();
3437
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette