VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103254

Last change on this file since 103254 was 103233, checked in by vboxsync, 10 months ago

VMM/IEM: Liveness analysis, part 8: Propagating EFLAGS annotations to the liveness code, asserting that flag modifications are within the annotations, gather some statistics on potential EFLAGS updating gains. bugref:10372 bugref:10375

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 316.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103233 2024-02-07 00:09:53Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103233 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 def toString(self, fRepr = False):
1508 """ Turn object into a string. """
1509 aasFields = [];
1510
1511 aasFields.append(['opcode', self.sOpcode]);
1512 if self.sPrefix:
1513 aasFields.append(['prefix', self.sPrefix]);
1514 aasFields.append(['mnemonic', self.sMnemonic]);
1515 for iOperand, oOperand in enumerate(self.aoOperands):
1516 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1517 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1518 aasFields.append(['encoding', self.sEncoding]);
1519 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1520 aasFields.append(['disenum', self.sDisEnum]);
1521 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1522 aasFields.append(['group', self.sGroup]);
1523 if self.fUnused: aasFields.append(['unused', 'True']);
1524 if self.fInvalid: aasFields.append(['invalid', 'True']);
1525 aasFields.append(['invlstyle', self.sInvalidStyle]);
1526 aasFields.append(['fltest', self.asFlTest]);
1527 aasFields.append(['flmodify', self.asFlModify]);
1528 aasFields.append(['flundef', self.asFlUndefined]);
1529 aasFields.append(['flset', self.asFlSet]);
1530 aasFields.append(['flclear', self.asFlClear]);
1531 aasFields.append(['mincpu', self.sMinCpu]);
1532 aasFields.append(['stats', self.sStats]);
1533 aasFields.append(['sFunction', self.sFunction]);
1534 if self.fStub: aasFields.append(['fStub', 'True']);
1535 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1536 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1537 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1538 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1539
1540 sRet = '<' if fRepr else '';
1541 for sField, sValue in aasFields:
1542 if sValue is not None:
1543 if len(sRet) > 1:
1544 sRet += '; ';
1545 sRet += '%s=%s' % (sField, sValue,);
1546 if fRepr:
1547 sRet += '>';
1548
1549 return sRet;
1550
1551 def __str__(self):
1552 """ Provide string represenation. """
1553 return self.toString(False);
1554
1555 def __repr__(self):
1556 """ Provide unambigious string representation. """
1557 return self.toString(True);
1558
1559 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1560 """
1561 Makes a copy of the object for the purpose of putting in a different map
1562 or a different place in the current map.
1563 """
1564 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1565
1566 oCopy.oParent = self;
1567 oCopy.sMnemonic = self.sMnemonic;
1568 oCopy.sBrief = self.sBrief;
1569 oCopy.asDescSections = list(self.asDescSections);
1570 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1571 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1572 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1573 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1574 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1575 oCopy.sEncoding = self.sEncoding;
1576 oCopy.asFlTest = self.asFlTest;
1577 oCopy.asFlModify = self.asFlModify;
1578 oCopy.asFlUndefined = self.asFlUndefined;
1579 oCopy.asFlSet = self.asFlSet;
1580 oCopy.asFlClear = self.asFlClear;
1581 oCopy.dHints = dict(self.dHints);
1582 oCopy.sDisEnum = self.sDisEnum;
1583 oCopy.asCpuIds = list(self.asCpuIds);
1584 oCopy.asReqFeatures = list(self.asReqFeatures);
1585 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1586 oCopy.sMinCpu = self.sMinCpu;
1587 oCopy.oCpuExpr = self.oCpuExpr;
1588 oCopy.sGroup = self.sGroup;
1589 oCopy.fUnused = self.fUnused;
1590 oCopy.fInvalid = self.fInvalid;
1591 oCopy.sInvalidStyle = self.sInvalidStyle;
1592 oCopy.sXcptType = self.sXcptType;
1593
1594 oCopy.sStats = self.sStats;
1595 oCopy.sFunction = self.sFunction;
1596 oCopy.fStub = self.fStub;
1597 oCopy.fUdStub = self.fUdStub;
1598
1599 oCopy.iLineCompleted = self.iLineCompleted;
1600 oCopy.cOpTags = self.cOpTags;
1601 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1602 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1603
1604 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1605 oCopy.asRawDisParams = list(self.asRawDisParams);
1606 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1607 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1608 oCopy.asCopyTests = list(self.asCopyTests);
1609
1610 return oCopy;
1611
1612 def getOpcodeByte(self):
1613 """
1614 Decodes sOpcode into a byte range integer value.
1615 Raises exception if sOpcode is None or invalid.
1616 """
1617 if self.sOpcode is None:
1618 raise Exception('No opcode byte for %s!' % (self,));
1619 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1620
1621 # Full hex byte form.
1622 if sOpcode[:2] == '0x':
1623 return int(sOpcode, 16);
1624
1625 # The /r form:
1626 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1627 return int(sOpcode[1:]) << 3;
1628
1629 # The 11/r form:
1630 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1631 return (int(sOpcode[-1:]) << 3) | 0xc0;
1632
1633 # The !11/r form (returns mod=1):
1634 ## @todo this doesn't really work...
1635 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1636 return (int(sOpcode[-1:]) << 3) | 0x80;
1637
1638 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1639
1640 @staticmethod
1641 def _flagsToIntegerMask(asFlags):
1642 """
1643 Returns the integer mask value for asFlags.
1644 """
1645 uRet = 0;
1646 if asFlags:
1647 for sFlag in asFlags:
1648 sConstant = g_kdEFlagsMnemonics[sFlag];
1649 assert sConstant[0] != '!', sConstant
1650 uRet |= g_kdX86EFlagsConstants[sConstant];
1651 return uRet;
1652
1653 def getTestedFlagsMask(self):
1654 """ Returns asFlTest into a integer mask value """
1655 return self._flagsToIntegerMask(self.asFlTest);
1656
1657 def getModifiedFlagsMask(self):
1658 """ Returns asFlModify into a integer mask value """
1659 return self._flagsToIntegerMask(self.asFlModify);
1660
1661 def getUndefinedFlagsMask(self):
1662 """ Returns asFlUndefined into a integer mask value """
1663 return self._flagsToIntegerMask(self.asFlUndefined);
1664
1665 def getSetFlagsMask(self):
1666 """ Returns asFlSet into a integer mask value """
1667 return self._flagsToIntegerMask(self.asFlSet);
1668
1669 def getClearedFlagsMask(self):
1670 """ Returns asFlClear into a integer mask value """
1671 return self._flagsToIntegerMask(self.asFlClear);
1672
1673 @staticmethod
1674 def _flagsToC(asFlags):
1675 """
1676 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1677 """
1678 if asFlags:
1679 asRet = [];
1680 for sFlag in asFlags:
1681 sConstant = g_kdEFlagsMnemonics[sFlag];
1682 assert sConstant[0] != '!', sConstant
1683 asRet.append(sConstant);
1684 return ' | '.join(asRet);
1685 return '0';
1686
1687 def getTestedFlagsCStyle(self):
1688 """ Returns asFlTest as C constants ored together. """
1689 return self._flagsToC(self.asFlTest);
1690
1691 def getModifiedFlagsCStyle(self):
1692 """ Returns asFlModify as C constants ored together. """
1693 return self._flagsToC(self.asFlModify);
1694
1695 def getUndefinedFlagsCStyle(self):
1696 """ Returns asFlUndefined as C constants ored together. """
1697 return self._flagsToC(self.asFlUndefined);
1698
1699 def getSetFlagsCStyle(self):
1700 """ Returns asFlSet as C constants ored together. """
1701 return self._flagsToC(self.asFlSet);
1702
1703 def getClearedFlagsCStyle(self):
1704 """ Returns asFlClear as C constants ored together. """
1705 return self._flagsToC(self.asFlClear);
1706
1707 def onlyInVexMaps(self):
1708 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1709 if not self.aoMaps:
1710 return False;
1711 for oMap in self.aoMaps:
1712 if not oMap.isVexMap():
1713 return False;
1714 return True;
1715
1716
1717
1718## All the instructions.
1719g_aoAllInstructions = [] # type: List[Instruction]
1720
1721## All the instructions indexed by statistics name (opstat).
1722g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1723
1724## All the instructions indexed by function name (opfunction).
1725g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1726
1727## Instructions tagged by oponlytest
1728g_aoOnlyTestInstructions = [] # type: List[Instruction]
1729
1730## Instruction maps.
1731g_aoInstructionMaps = [
1732 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1733 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1734 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1735 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1736 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1737 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1738 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1739 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1740 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1741 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1742 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1743 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1744 ## @todo g_apfnEscF1_E0toFF
1745 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1746 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1747 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1748 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1749 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1750 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1751 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1752 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1753
1754 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1755 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1756 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1757 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1758 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1759 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1760 ## @todo What about g_apfnGroup9MemReg?
1761 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1762 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1763 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1764 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1765 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1766 ## @todo What about g_apfnGroup15RegReg?
1767 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1768 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1769 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1770
1771 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1772 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1773
1774 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1775 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1776 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1777 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1778 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1779 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1780
1781 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1782 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1783
1784 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1785 InstructionMap('xopmap8', sEncoding = 'xop8'),
1786 InstructionMap('xopmap9', sEncoding = 'xop9'),
1787 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1788 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1789 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1790 InstructionMap('xopmap10', sEncoding = 'xop10'),
1791 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1792];
1793g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1794g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1795
1796
1797#
1798# Decoder functions.
1799#
1800
1801class DecoderFunction(object):
1802 """
1803 Decoder function.
1804
1805 This is mainly for searching for scoping searches for variables used in
1806 microcode blocks.
1807 """
1808 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1809 self.sName = sName; ##< The function name.
1810 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1811 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1812 self.iBeginLine = iBeginLine; ##< The start line.
1813 self.iEndLine = -1; ##< The line the function (probably) ends on.
1814 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1815
1816 def complete(self, iEndLine, asLines):
1817 """
1818 Completes the function.
1819 """
1820 assert self.iEndLine == -1;
1821 self.iEndLine = iEndLine;
1822 self.asLines = asLines;
1823
1824
1825#
1826# "Microcode" statements and blocks
1827#
1828
1829class McStmt(object):
1830 """
1831 Statement in a microcode block.
1832 """
1833 def __init__(self, sName, asParams):
1834 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1835 self.asParams = asParams;
1836 self.oUser = None;
1837
1838 def renderCode(self, cchIndent = 0):
1839 """
1840 Renders the code for the statement.
1841 """
1842 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1843
1844 @staticmethod
1845 def renderCodeForList(aoStmts, cchIndent = 0):
1846 """
1847 Renders a list of statements.
1848 """
1849 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1850
1851 @staticmethod
1852 def findStmtByNames(aoStmts, dNames):
1853 """
1854 Returns first statement with any of the given names in from the list.
1855
1856 Note! The names are passed as a dictionary for quick lookup, the value
1857 does not matter.
1858 """
1859 for oStmt in aoStmts:
1860 if oStmt.sName in dNames:
1861 return oStmt;
1862 if isinstance(oStmt, McStmtCond):
1863 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1864 if not oHit:
1865 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1866 if oHit:
1867 return oHit;
1868 return None;
1869
1870 def isCppStmt(self):
1871 """ Checks if this is a C++ statement. """
1872 return self.sName.startswith('C++');
1873
1874class McStmtCond(McStmt):
1875 """
1876 Base class for conditional statements (IEM_MC_IF_XXX).
1877 """
1878 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1879 McStmt.__init__(self, sName, asParams);
1880 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1881 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1882 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1883 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1884
1885 def renderCode(self, cchIndent = 0):
1886 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1887 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1888 if self.aoElseBranch:
1889 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1890 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1891 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1892 return sRet;
1893
1894class McStmtVar(McStmt):
1895 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1896 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1897 McStmt.__init__(self, sName, asParams);
1898 self.sType = sType;
1899 self.sVarName = sVarName;
1900 self.sValue = sValue; ##< None if no assigned / const value.
1901
1902class McStmtArg(McStmtVar):
1903 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1904 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1905 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1906 self.iArg = iArg;
1907 self.sRef = sRef; ##< The reference string (local variable, register).
1908 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1909 assert sRefType in ('none', 'local');
1910
1911class McStmtCall(McStmt):
1912 """ IEM_MC_CALL_* """
1913 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1914 McStmt.__init__(self, sName, asParams);
1915 self.idxFn = iFnParam;
1916 self.idxParams = iFnParam + 1;
1917 self.sFn = asParams[iFnParam];
1918 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1919
1920class McStmtAssertEFlags(McStmt):
1921 """
1922 IEM_MC_ASSERT_EFLAGS
1923 """
1924 def __init__(self, oInstruction):
1925 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1926 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1927
1928
1929class McCppGeneric(McStmt):
1930 """
1931 Generic C++/C statement.
1932 """
1933 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1934 McStmt.__init__(self, sName, [sCode,]);
1935 self.fDecode = fDecode;
1936 self.cchIndent = cchIndent;
1937
1938 def renderCode(self, cchIndent = 0):
1939 cchIndent += self.cchIndent;
1940 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1941 if self.fDecode:
1942 sRet = sRet.replace('\n', ' // C++ decode\n');
1943 else:
1944 sRet = sRet.replace('\n', ' // C++ normal\n');
1945 return sRet;
1946
1947class McCppCall(McCppGeneric):
1948 """
1949 A generic C++/C call statement.
1950
1951 The sName is still 'C++', so the function name is in the first parameter
1952 and the the arguments in the subsequent ones.
1953 """
1954 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1955 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1956 self.asParams.extend(asArgs);
1957
1958 def renderCode(self, cchIndent = 0):
1959 cchIndent += self.cchIndent;
1960 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1961 if self.fDecode:
1962 sRet += ' // C++ decode\n';
1963 else:
1964 sRet += ' // C++ normal\n';
1965 return sRet;
1966
1967class McCppCond(McStmtCond):
1968 """
1969 C++/C 'if' statement.
1970 """
1971 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1972 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1973 self.fDecode = fDecode;
1974 self.cchIndent = cchIndent;
1975
1976 def renderCode(self, cchIndent = 0):
1977 cchIndent += self.cchIndent;
1978 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1979 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1980 sRet += ' ' * cchIndent + '{\n';
1981 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1982 sRet += ' ' * cchIndent + '}\n';
1983 if self.aoElseBranch:
1984 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1985 sRet += ' ' * cchIndent + '{\n';
1986 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1987 sRet += ' ' * cchIndent + '}\n';
1988 return sRet;
1989
1990class McCppPreProc(McCppGeneric):
1991 """
1992 C++/C Preprocessor directive.
1993 """
1994 def __init__(self, sCode):
1995 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1996
1997 def renderCode(self, cchIndent = 0):
1998 return self.asParams[0] + '\n';
1999
2000
2001## IEM_MC_F_XXX values.
2002g_kdMcFlags = {
2003 'IEM_MC_F_ONLY_8086': (),
2004 'IEM_MC_F_MIN_186': (),
2005 'IEM_MC_F_MIN_286': (),
2006 'IEM_MC_F_NOT_286_OR_OLDER': (),
2007 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2008 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2009 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2010 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2011 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2012 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2013 'IEM_MC_F_NOT_64BIT': (),
2014};
2015## IEM_MC_F_XXX values.
2016g_kdCImplFlags = {
2017 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2018 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2019 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2020 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2021 'IEM_CIMPL_F_BRANCH_FAR': (),
2022 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2023 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2024 'IEM_CIMPL_F_BRANCH_STACK': (),
2025 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2026 'IEM_CIMPL_F_MODE': (),
2027 'IEM_CIMPL_F_RFLAGS': (),
2028 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2029 'IEM_CIMPL_F_STATUS_FLAGS': (),
2030 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2031 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2032 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2033 'IEM_CIMPL_F_VMEXIT': (),
2034 'IEM_CIMPL_F_FPU': (),
2035 'IEM_CIMPL_F_REP': (),
2036 'IEM_CIMPL_F_IO': (),
2037 'IEM_CIMPL_F_END_TB': (),
2038 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2039 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2040 'IEM_CIMPL_F_CALLS_CIMPL': (),
2041 'IEM_CIMPL_F_CALLS_AIMPL': (),
2042 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2043};
2044class McBlock(object):
2045 """
2046 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2047 """
2048
2049 ## @name Macro expansion types.
2050 ## @{
2051 kiMacroExp_None = 0;
2052 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2053 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2054 ## @}
2055
2056 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2057 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2058 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2059 self.fDeferToCImpl = fDeferToCImpl;
2060 ## The source file containing the block.
2061 self.sSrcFile = sSrcFile;
2062 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2063 self.iBeginLine = iBeginLine;
2064 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2065 self.offBeginLine = offBeginLine;
2066 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2067 self.iEndLine = -1;
2068 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2069 self.offEndLine = 0;
2070 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2071 self.offAfterEnd = 0;
2072 ## The function the block resides in.
2073 self.oFunction = oFunction;
2074 ## The name of the function the block resides in. DEPRECATED.
2075 self.sFunction = oFunction.sName;
2076 ## The block number within the function.
2077 self.iInFunction = iInFunction;
2078 ## The instruction this block is associated with - can be None.
2079 self.oInstruction = oInstruction # type: Instruction
2080 ## Indentation level of the block.
2081 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2082 ## The raw lines the block is made up of.
2083 self.asLines = [] # type: List[str]
2084 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2085 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2086 self.iMacroExp = self.kiMacroExp_None;
2087 ## IEM_MC_BEGIN: Argument count.
2088 self.cArgs = -1;
2089 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2090 self.aoArgs = [] # type: List[McStmtArg]
2091 ## IEM_MC_BEGIN: Locals count.
2092 self.cLocals = -1;
2093 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2094 self.aoLocals = [] # type: List[McStmtVar]
2095 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2096 self.dsMcFlags = {} # type: Dict[str, bool]
2097 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2098 self.dsCImplFlags = {} # type: Dict[str, bool]
2099 ## Decoded statements in the block.
2100 self.aoStmts = [] # type: List[McStmt]
2101
2102 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2103 """
2104 Completes the microcode block.
2105 """
2106 assert self.iEndLine == -1;
2107 self.iEndLine = iEndLine;
2108 self.offEndLine = offEndLine;
2109 self.offAfterEnd = offAfterEnd;
2110 self.asLines = asLines;
2111
2112 def raiseDecodeError(self, sRawCode, off, sMessage):
2113 """ Raises a decoding error. """
2114 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2115 iLine = sRawCode.count('\n', 0, off);
2116 raise ParserException('%s:%d:%d: parsing error: %s'
2117 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2118
2119 def raiseStmtError(self, sName, sMessage):
2120 """ Raises a statement parser error. """
2121 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2122
2123 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2124 """ Check the parameter count, raising an error it doesn't match. """
2125 if len(asParams) != cParamsExpected:
2126 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2127 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2128 return True;
2129
2130 @staticmethod
2131 def parseMcGeneric(oSelf, sName, asParams):
2132 """ Generic parser that returns a plain McStmt object. """
2133 _ = oSelf;
2134 return McStmt(sName, asParams);
2135
2136 @staticmethod
2137 def parseMcGenericCond(oSelf, sName, asParams):
2138 """ Generic parser that returns a plain McStmtCond object. """
2139 _ = oSelf;
2140 return McStmtCond(sName, asParams);
2141
2142 @staticmethod
2143 def parseMcBegin(oSelf, sName, asParams):
2144 """ IEM_MC_BEGIN """
2145 oSelf.checkStmtParamCount(sName, asParams, 4);
2146 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2147 oSelf.raiseStmtError(sName, 'Used more than once!');
2148 oSelf.cArgs = int(asParams[0]);
2149 oSelf.cLocals = int(asParams[1]);
2150
2151 if asParams[2] != '0':
2152 for sFlag in asParams[2].split('|'):
2153 sFlag = sFlag.strip();
2154 if sFlag not in g_kdMcFlags:
2155 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2156 oSelf.dsMcFlags[sFlag] = True;
2157 for sFlag2 in g_kdMcFlags[sFlag]:
2158 oSelf.dsMcFlags[sFlag2] = True;
2159
2160 if asParams[3] != '0':
2161 oSelf.parseCImplFlags(sName, asParams[3]);
2162
2163 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2164
2165 @staticmethod
2166 def parseMcArg(oSelf, sName, asParams):
2167 """ IEM_MC_ARG """
2168 oSelf.checkStmtParamCount(sName, asParams, 3);
2169 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2170 oSelf.aoArgs.append(oStmt);
2171 return oStmt;
2172
2173 @staticmethod
2174 def parseMcArgConst(oSelf, sName, asParams):
2175 """ IEM_MC_ARG_CONST """
2176 oSelf.checkStmtParamCount(sName, asParams, 4);
2177 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2178 oSelf.aoArgs.append(oStmt);
2179 return oStmt;
2180
2181 @staticmethod
2182 def parseMcArgLocalRef(oSelf, sName, asParams):
2183 """ IEM_MC_ARG_LOCAL_REF """
2184 oSelf.checkStmtParamCount(sName, asParams, 4);
2185 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2186 oSelf.aoArgs.append(oStmt);
2187 return oStmt;
2188
2189 @staticmethod
2190 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2191 """ IEM_MC_ARG_LOCAL_EFLAGS """
2192 oSelf.checkStmtParamCount(sName, asParams, 3);
2193 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2194 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2195 oSelf.aoLocals.append(oStmtLocal);
2196 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2197 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2198 oSelf.aoArgs.append(oStmtArg);
2199 return (oStmtLocal, oStmtArg,);
2200
2201 @staticmethod
2202 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2203 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2204 oSelf.checkStmtParamCount(sName, asParams, 0);
2205 # Note! Translate to IEM_MC_ARG_CONST
2206 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2207 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2208 oSelf.aoArgs.append(oStmt);
2209 return oStmt;
2210
2211 @staticmethod
2212 def parseMcLocal(oSelf, sName, asParams):
2213 """ IEM_MC_LOCAL """
2214 oSelf.checkStmtParamCount(sName, asParams, 2);
2215 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2216 oSelf.aoLocals.append(oStmt);
2217 return oStmt;
2218
2219 @staticmethod
2220 def parseMcLocalAssign(oSelf, sName, asParams):
2221 """ IEM_MC_LOCAL_ASSIGN """
2222 oSelf.checkStmtParamCount(sName, asParams, 3);
2223 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2224 oSelf.aoLocals.append(oStmt);
2225 return oStmt;
2226
2227 @staticmethod
2228 def parseMcLocalConst(oSelf, sName, asParams):
2229 """ IEM_MC_LOCAL_CONST """
2230 oSelf.checkStmtParamCount(sName, asParams, 3);
2231 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2232 oSelf.aoLocals.append(oStmt);
2233 return oStmt;
2234
2235 @staticmethod
2236 def parseMcCallAImpl(oSelf, sName, asParams):
2237 """ IEM_MC_CALL_AIMPL_3|4 """
2238 cArgs = int(sName[-1]);
2239 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2240 return McStmtCall(sName, asParams, 1, 0);
2241
2242 @staticmethod
2243 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2244 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2245 cArgs = int(sName[-1]);
2246 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2247 return McStmtCall(sName, asParams, 0);
2248
2249 @staticmethod
2250 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2251 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2252 cArgs = int(sName[-1]);
2253 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2254 return McStmtCall(sName, asParams, 0);
2255
2256 @staticmethod
2257 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2258 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2259 cArgs = int(sName[-1]);
2260 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2261 return McStmtCall(sName, asParams, 0);
2262
2263 @staticmethod
2264 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2265 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2266 cArgs = int(sName[-1]);
2267 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2268 return McStmtCall(sName, asParams, 0);
2269
2270 @staticmethod
2271 def parseMcCallSseAImpl(oSelf, sName, asParams):
2272 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2273 cArgs = int(sName[-1]);
2274 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2275 return McStmtCall(sName, asParams, 0);
2276
2277 def parseCImplFlags(self, sName, sFlags):
2278 """
2279 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2280 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2281 """
2282 if sFlags != '0':
2283 sFlags = self.stripComments(sFlags);
2284 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2285 for sFlag in sFlags.split('|'):
2286 sFlag = sFlag.strip();
2287 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2288 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2289 #print('debug: %s' % sFlag)
2290 if sFlag not in g_kdCImplFlags:
2291 if sFlag == '0':
2292 continue;
2293 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2294 self.dsCImplFlags[sFlag] = True;
2295 for sFlag2 in g_kdCImplFlags[sFlag]:
2296 self.dsCImplFlags[sFlag2] = True;
2297 return None;
2298
2299 @staticmethod
2300 def parseMcCallCImpl(oSelf, sName, asParams):
2301 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2302 cArgs = int(sName[-1]);
2303 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2304 oSelf.parseCImplFlags(sName, asParams[0]);
2305 return McStmtCall(sName, asParams, 2);
2306
2307 @staticmethod
2308 def parseMcDeferToCImpl(oSelf, sName, asParams):
2309 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2310 # Note! This code is called by workerIemMcDeferToCImplXRet.
2311 #print('debug: %s, %s,...' % (sName, asParams[0],));
2312 cArgs = int(sName[-5]);
2313 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2314 oSelf.parseCImplFlags(sName, asParams[0]);
2315 return McStmtCall(sName, asParams, 2);
2316
2317 @staticmethod
2318 def stripComments(sCode):
2319 """ Returns sCode with comments removed. """
2320 off = 0;
2321 while off < len(sCode):
2322 off = sCode.find('/', off);
2323 if off < 0 or off + 1 >= len(sCode):
2324 break;
2325
2326 if sCode[off + 1] == '/':
2327 # C++ comment.
2328 offEnd = sCode.find('\n', off + 2);
2329 if offEnd < 0:
2330 return sCode[:off].rstrip();
2331 sCode = sCode[ : off] + sCode[offEnd : ];
2332 off += 1;
2333
2334 elif sCode[off + 1] == '*':
2335 # C comment
2336 offEnd = sCode.find('*/', off + 2);
2337 if offEnd < 0:
2338 return sCode[:off].rstrip();
2339 sSep = ' ';
2340 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2341 sSep = '';
2342 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2343 off += len(sSep);
2344
2345 else:
2346 # Not a comment.
2347 off += 1;
2348 return sCode;
2349
2350 @staticmethod
2351 def extractParam(sCode, offParam):
2352 """
2353 Extracts the parameter value at offParam in sCode.
2354 Returns stripped value and the end offset of the terminating ',' or ')'.
2355 """
2356 # Extract it.
2357 cNesting = 0;
2358 offStart = offParam;
2359 while offParam < len(sCode):
2360 ch = sCode[offParam];
2361 if ch == '(':
2362 cNesting += 1;
2363 elif ch == ')':
2364 if cNesting == 0:
2365 break;
2366 cNesting -= 1;
2367 elif ch == ',' and cNesting == 0:
2368 break;
2369 offParam += 1;
2370 return (sCode[offStart : offParam].strip(), offParam);
2371
2372 @staticmethod
2373 def extractParams(sCode, offOpenParen):
2374 """
2375 Parses a parameter list.
2376 Returns the list of parameter values and the offset of the closing parentheses.
2377 Returns (None, len(sCode)) on if no closing parentheses was found.
2378 """
2379 assert sCode[offOpenParen] == '(';
2380 asParams = [];
2381 off = offOpenParen + 1;
2382 while off < len(sCode):
2383 ch = sCode[off];
2384 if ch.isspace():
2385 off += 1;
2386 elif ch != ')':
2387 (sParam, off) = McBlock.extractParam(sCode, off);
2388 asParams.append(sParam);
2389 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2390 if sCode[off] == ',':
2391 off += 1;
2392 else:
2393 return (asParams, off);
2394 return (None, off);
2395
2396 @staticmethod
2397 def findClosingBraces(sCode, off, offStop):
2398 """
2399 Finds the matching '}' for the '{' at off in sCode.
2400 Returns offset of the matching '}' on success, otherwise -1.
2401
2402 Note! Does not take comments into account.
2403 """
2404 cDepth = 1;
2405 off += 1;
2406 while off < offStop:
2407 offClose = sCode.find('}', off, offStop);
2408 if offClose < 0:
2409 break;
2410 cDepth += sCode.count('{', off, offClose);
2411 cDepth -= 1;
2412 if cDepth == 0:
2413 return offClose;
2414 off = offClose + 1;
2415 return -1;
2416
2417 @staticmethod
2418 def countSpacesAt(sCode, off, offStop):
2419 """ Returns the number of space characters at off in sCode. """
2420 offStart = off;
2421 while off < offStop and sCode[off].isspace():
2422 off += 1;
2423 return off - offStart;
2424
2425 @staticmethod
2426 def skipSpacesAt(sCode, off, offStop):
2427 """ Returns first offset at or after off for a non-space character. """
2428 return off + McBlock.countSpacesAt(sCode, off, offStop);
2429
2430 @staticmethod
2431 def isSubstrAt(sStr, off, sSubStr):
2432 """ Returns true of sSubStr is found at off in sStr. """
2433 return sStr[off : off + len(sSubStr)] == sSubStr;
2434
2435 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2436 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2437 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2438 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2439 + r')');
2440
2441 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2442 """
2443 Decodes sRawCode[off : offStop].
2444
2445 Returns list of McStmt instances.
2446 Raises ParserException on failure.
2447 """
2448 if offStop < 0:
2449 offStop = len(sRawCode);
2450 aoStmts = [];
2451 while off < offStop:
2452 ch = sRawCode[off];
2453
2454 #
2455 # Skip spaces and comments.
2456 #
2457 if ch.isspace():
2458 off += 1;
2459
2460 elif ch == '/':
2461 ch = sRawCode[off + 1];
2462 if ch == '/': # C++ comment.
2463 off = sRawCode.find('\n', off + 2);
2464 if off < 0:
2465 break;
2466 off += 1;
2467 elif ch == '*': # C comment.
2468 off = sRawCode.find('*/', off + 2);
2469 if off < 0:
2470 break;
2471 off += 2;
2472 else:
2473 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2474
2475 #
2476 # Is it a MC statement.
2477 #
2478 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2479 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2480 # Extract it and strip comments from it.
2481 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2482 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2483 if offEnd <= off:
2484 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2485 else:
2486 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2487 if offEnd <= off:
2488 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2489 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2490 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2491 offEnd -= 1;
2492 while offEnd > off and sRawCode[offEnd - 1].isspace():
2493 offEnd -= 1;
2494
2495 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2496
2497 # Isolate the statement name.
2498 offOpenParen = sRawStmt.find('(');
2499 if offOpenParen < 0:
2500 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2501 sName = sRawStmt[: offOpenParen].strip();
2502
2503 # Extract the parameters.
2504 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2505 if asParams is None:
2506 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2507 if offCloseParen + 1 != len(sRawStmt):
2508 self.raiseDecodeError(sRawCode, off,
2509 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2510
2511 # Hand it to the handler.
2512 fnParser = g_dMcStmtParsers.get(sName);
2513 if not fnParser:
2514 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2515 fnParser = fnParser[0];
2516 oStmt = fnParser(self, sName, asParams);
2517 if not isinstance(oStmt, (list, tuple)):
2518 aoStmts.append(oStmt);
2519 else:
2520 aoStmts.extend(oStmt);
2521
2522 #
2523 # If conditional, we need to parse the whole statement.
2524 #
2525 # For reasons of simplicity, we assume the following structure
2526 # and parse each branch in a recursive call:
2527 # IEM_MC_IF_XXX() {
2528 # IEM_MC_WHATEVER();
2529 # } IEM_MC_ELSE() {
2530 # IEM_MC_WHATEVER();
2531 # } IEM_MC_ENDIF();
2532 #
2533 if sName.startswith('IEM_MC_IF_'):
2534 if iLevel > 1:
2535 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2536
2537 # Find start of the IF block:
2538 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2539 if sRawCode[offBlock1] != '{':
2540 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2541
2542 # Find the end of it.
2543 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2544 if offBlock1End < 0:
2545 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2546
2547 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2548
2549 # Is there an else section?
2550 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2551 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2552 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2553 if sRawCode[off] != '(':
2554 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2555 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2556 if sRawCode[off] != ')':
2557 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2558
2559 # Find start of the ELSE block.
2560 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2561 if sRawCode[offBlock2] != '{':
2562 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2563
2564 # Find the end of it.
2565 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2566 if offBlock2End < 0:
2567 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2568
2569 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2570 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2571
2572 # Parse past the endif statement.
2573 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2574 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2575 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2576 if sRawCode[off] != '(':
2577 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2578 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2579 if sRawCode[off] != ')':
2580 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2581 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2582 if sRawCode[off] != ';':
2583 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2584 off += 1;
2585
2586 else:
2587 # Advance.
2588 off = offEnd + 1;
2589
2590 #
2591 # Otherwise it must be a C/C++ statement of sorts.
2592 #
2593 else:
2594 # Find the end of the statement. if and else requires special handling.
2595 sCondExpr = None;
2596 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2597 if oMatch:
2598 if oMatch.group(1)[-1] == '(':
2599 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2600 else:
2601 offEnd = oMatch.end();
2602 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2603 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2604 elif ch == '#':
2605 offEnd = sRawCode.find('\n', off, offStop);
2606 if offEnd < 0:
2607 offEnd = offStop;
2608 offEnd -= 1;
2609 while offEnd > off and sRawCode[offEnd - 1].isspace():
2610 offEnd -= 1;
2611 else:
2612 offEnd = sRawCode.find(';', off);
2613 if offEnd < 0:
2614 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2615
2616 # Check this and the following statement whether it might have
2617 # something to do with decoding. This is a statement filter
2618 # criteria when generating the threaded functions blocks.
2619 offNextEnd = sRawCode.find(';', offEnd + 1);
2620 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2621 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2622 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2623 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2624 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2625 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2626 );
2627
2628 if not oMatch:
2629 if ch != '#':
2630 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2631 else:
2632 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2633 off = offEnd + 1;
2634 elif oMatch.group(1).startswith('if'):
2635 #
2636 # if () xxx [else yyy] statement.
2637 #
2638 oStmt = McCppCond(sCondExpr, fDecode);
2639 aoStmts.append(oStmt);
2640 off = offEnd + 1;
2641
2642 # Following the if () we can either have a {} containing zero or more statements
2643 # or we have a single statement.
2644 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2645 if sRawCode[offBlock1] == '{':
2646 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2647 if offBlock1End < 0:
2648 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2649 offBlock1 += 1;
2650 else:
2651 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2652 if offBlock1End < 0:
2653 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2654
2655 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2656
2657 # The else is optional and can likewise be followed by {} or a single statement.
2658 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2659 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2660 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2661 if sRawCode[offBlock2] == '{':
2662 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2663 if offBlock2End < 0:
2664 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2665 offBlock2 += 1;
2666 else:
2667 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2668 if offBlock2End < 0:
2669 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2670
2671 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2672 off = offBlock2End + 1;
2673
2674 elif oMatch.group(1) == 'else':
2675 # Problematic 'else' branch, typically involving #ifdefs.
2676 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2677
2678 return aoStmts;
2679
2680 def decode(self):
2681 """
2682 Decodes the block, populating self.aoStmts if necessary.
2683 Returns the statement list.
2684 Raises ParserException on failure.
2685 """
2686 if not self.aoStmts:
2687 self.aoStmts = self.decodeCode(''.join(self.asLines));
2688 return self.aoStmts;
2689
2690
2691 def checkForTooEarlyEffSegUse(self, aoStmts):
2692 """
2693 Checks if iEffSeg is used before the effective address has been decoded.
2694 Returns None on success, error string on failure.
2695
2696 See r158454 for an example of this issue.
2697 """
2698
2699 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2700 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2701 # as we're ASSUMING these will not occur before address calculation.
2702 for iStmt, oStmt in enumerate(aoStmts):
2703 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2704 while iStmt > 0:
2705 iStmt -= 1;
2706 oStmt = aoStmts[iStmt];
2707 for sArg in oStmt.asParams:
2708 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2709 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2710 break;
2711 return None;
2712
2713 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2714 kdDecodeCppStmtOkayAfterDone = {
2715 'IEMOP_HLP_IN_VMX_OPERATION': True,
2716 'IEMOP_HLP_VMX_INSTR': True,
2717 };
2718
2719 def checkForDoneDecoding(self, aoStmts):
2720 """
2721 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2722 invocation.
2723 Returns None on success, error string on failure.
2724
2725 This ensures safe instruction restarting in case the recompiler runs
2726 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2727 entries).
2728 """
2729
2730 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2731 # don't need to look.
2732 cIemOpHlpDone = 0;
2733 for iStmt, oStmt in enumerate(aoStmts):
2734 if oStmt.isCppStmt():
2735 #print('dbg: #%u[%u]: %s %s (%s)'
2736 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2737
2738 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2739 if oMatch:
2740 sFirstWord = oMatch.group(1);
2741 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2742 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2743 cIemOpHlpDone += 1;
2744 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2745 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2746 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2747 else:
2748 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2749 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2750 cIemOpHlpDone += 1;
2751 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2752 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2753 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2754 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2755 if cIemOpHlpDone == 1:
2756 return None;
2757 if cIemOpHlpDone > 1:
2758 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2759 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2760
2761 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2762 """
2763 Checks that the register references are placed after register fetches
2764 from the same register class.
2765 Returns None on success, error string on failure.
2766
2767 Example:
2768 SHL CH, CL
2769
2770 If the CH reference is created first, the fetching of CL will cause the
2771 RCX guest register to have an active shadow register when it's being
2772 updated. The shadow register will then be stale after the SHL operation
2773 completes, without us noticing.
2774
2775 It's easier to ensure we've got correct code than complicating the
2776 recompiler code with safeguards here.
2777 """
2778 for iStmt, oStmt in enumerate(aoStmts):
2779 if not oStmt.isCppStmt():
2780 offRef = oStmt.sName.find("_REF_");
2781 if offRef > 0:
2782 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2783 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2784 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2785 sClass = 'FPUREG';
2786 else:
2787 offUnderscore = oStmt.sName.find('_', offRef + 5);
2788 if offUnderscore > 0:
2789 assert offUnderscore > offRef;
2790 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2791 else:
2792 sClass = oStmt.sName[offRef + 5];
2793 asRegRefClasses[sClass] = True;
2794 else:
2795 offFetch = oStmt.sName.find("_FETCH_");
2796 if offFetch > 0:
2797 sClass = oStmt.sName[offFetch + 7 : ];
2798 if not sClass.startswith("MEM"):
2799 offUnderscore = sClass.find('_');
2800 if offUnderscore >= 0:
2801 assert offUnderscore > 0;
2802 sClass = sClass[:offUnderscore];
2803 if sClass in asRegRefClasses:
2804 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2805 % (iStmt + 1, oStmt.sName,);
2806
2807 # Go into branches.
2808 if isinstance(oStmt, McStmtCond):
2809 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2810 if sRet:
2811 return sRet;
2812 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2813 if sRet:
2814 return sRet;
2815 return None;
2816
2817 def check(self):
2818 """
2819 Performs some sanity checks on the block.
2820 Returns error string list, empty if all is fine.
2821 """
2822 aoStmts = self.decode();
2823 asRet = [];
2824
2825 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2826 if sRet:
2827 asRet.append(sRet);
2828
2829 sRet = self.checkForDoneDecoding(aoStmts);
2830 if sRet:
2831 asRet.append(sRet);
2832
2833 sRet = self.checkForFetchAfterRef(aoStmts, {});
2834 if sRet:
2835 asRet.append(sRet);
2836
2837 return asRet;
2838
2839
2840
2841## IEM_MC_XXX -> parser + info dictionary.
2842#
2843# The info columns:
2844# - col 1+0: boolean entry indicating whether the statement modifies state and
2845# must not be used before IEMOP_HL_DONE_*.
2846# - col 1+1: boolean entry indicating similar to the previous column but is
2847# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2848# The difference is that most IEM_MC_IF_XXX entries are False here.
2849# - col 1+2: boolean entry indicating native recompiler support.
2850#
2851# The raw table was generated via the following command
2852# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2853# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2854# pylint: disable=line-too-long
2855g_dMcStmtParsers = {
2856 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2857 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2858 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2859 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2860 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2861 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2862 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2863 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2864 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2865 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2866 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2867 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2868 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2869 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2870 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2871 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2872 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2873 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2874 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2875 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2876 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2877 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2878 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2879 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2880 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2881 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
2882 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
2883 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, False, ),
2884 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
2885 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2886 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2887 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2888 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2889 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2890 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2891 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2892 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2893 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2894 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2895 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2896 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2897 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2898 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2899 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2900 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
2901 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
2902 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, False, ),
2903 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2904 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2905 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2906 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2907 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2908 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2909 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2910 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2911 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2912 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2913 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2914 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2915 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2916 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2917 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2918 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2919 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2920 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2921 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2922 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2923 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2924 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2925 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2926 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2927 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2928 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2929 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, False, ),
2930 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, False, ),
2931 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
2932 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
2933 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2934 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2935 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2936 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2937 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2938 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2939 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2940 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
2941 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
2942 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
2943 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, False, ),
2944 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, False, ),
2945 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
2946 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
2947 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2948 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
2949 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2950 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
2951 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2953 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
2954 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2955 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2956 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2957 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2958 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2959 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2960 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2961 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2962 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
2963 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
2964 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
2967 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
2968 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
2969 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
2970 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
2971 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
2972 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
2973 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2974 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
2975 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
2976 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
2977 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
2978 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
2979 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2980 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2981 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2982 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2983 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
2984 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
2985 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2986 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
2987 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
2988 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2989 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2990 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
2991 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
2992 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
2993 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2994 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2995 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2996 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2997 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2998 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2999 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3000 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3001 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3002 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3003 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3004 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3005 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3006 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3007 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3008 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3009 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3010 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3011 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3012 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3013 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3014 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3015 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3016 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3017 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3020 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3021 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3022 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3023 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, False, ),
3024 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3025 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3026 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3027 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3028 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3029 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, False, ),
3030 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3031 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
3032 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3033 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3034 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3035 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3036 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3037 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3038 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3045 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3046 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3047 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3048 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3049 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3050 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3051 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3052 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3053 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3054 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3055 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3056 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3057 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3058 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3059 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3060 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3061 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3062 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3063 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3064 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3065 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3066 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3067 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3068 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3069 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3070 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3071 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, False, ),
3072 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, False, ),
3073 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3074 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3075 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3076 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3077 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3078 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3079 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3080 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3081 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3082 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3083 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3084 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3085 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3086 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3087 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3088 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3089 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3090 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3091 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3092 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3093 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3094 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3095 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3096 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3097 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3098 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3099 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3100 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3101 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3102 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3103 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3104 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3105 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3106 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3107 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3108 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3109 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3110 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3111 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3112 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3113 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3114 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3115 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3116 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3117 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3118 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3119 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3120 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3121 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3122 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3123 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3124 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3125 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3126 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3127 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3128 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3129 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3130 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3131 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3132 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3133 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3134 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3135 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3136 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3137 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3138 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3139 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3140 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3141 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
3142 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3143 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3144 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3145 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3146 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3147 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3148 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3149 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3150 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3151 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3152 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3153 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3154 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3155 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3156 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, False, ),
3157 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3158 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3159 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3160 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3161 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3162 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3163 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3164 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3165 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3166 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3167 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3168 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3169 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3170 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3171 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3172 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3173 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3174 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3175 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3176 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3177 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, False, ),
3178 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3179 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3180 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3181 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3182 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3183 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3184 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3185 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3186 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3187 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3188 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3189 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3190 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3191 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3192 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3193 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3194 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3195 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3196 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3197 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3201 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3202 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3203 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3204 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3205 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3206 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3207 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3208 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3209 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3210 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3211 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3215 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3216 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3217 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3218 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3219 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3220 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3221 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3222 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3223 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3224 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3225 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3226 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3227 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3228 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3230 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3231 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3232 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3233 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3234 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3235 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3236 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3237 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3238 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3239 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3240 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3241 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3242 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3243 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3244 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3245 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3246 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True, True, False, ),
3247 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3248 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3249 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3250 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3251 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3252 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3253 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3254 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3255 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3256 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3257 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3258 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3259 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3260 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3261 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3262 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3263 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3264 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3265 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3266 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3267 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3268 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3269 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3270 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3271 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3272 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3273 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3274 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3275 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3276 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3277};
3278# pylint: enable=line-too-long
3279
3280## List of microcode blocks.
3281g_aoMcBlocks = [] # type: List[McBlock]
3282
3283
3284
3285class ParserException(Exception):
3286 """ Parser exception """
3287 def __init__(self, sMessage):
3288 Exception.__init__(self, sMessage);
3289
3290
3291class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3292 """
3293 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3294 """
3295
3296 ## @name Parser state.
3297 ## @{
3298 kiCode = 0;
3299 kiCommentMulti = 1;
3300 ## @}
3301
3302 class Macro(object):
3303 """ Macro """
3304 def __init__(self, sName, asArgs, sBody, iLine):
3305 self.sName = sName; ##< The macro name.
3306 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3307 self.sBody = sBody;
3308 self.iLine = iLine;
3309 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3310
3311 @staticmethod
3312 def _needSpace(ch):
3313 """ This is just to make the expanded output a bit prettier. """
3314 return ch.isspace() and ch != '(';
3315
3316 def expandMacro(self, oParent, asArgs = None):
3317 """ Expands the macro body with the given arguments. """
3318 _ = oParent;
3319 sBody = self.sBody;
3320
3321 if self.oReArgMatch:
3322 assert len(asArgs) == len(self.asArgs);
3323 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3324
3325 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3326 oMatch = self.oReArgMatch.search(sBody);
3327 while oMatch:
3328 sName = oMatch.group(2);
3329 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3330 sValue = dArgs[sName];
3331 sPre = '';
3332 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3333 sPre = ' ';
3334 sPost = '';
3335 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3336 sPost = ' ';
3337 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3338 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3339 else:
3340 assert not asArgs;
3341
3342 return sBody;
3343
3344 class PreprocessorConditional(object):
3345 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3346
3347 ## Known defines.
3348 # - A value of 1 indicates that it's always defined.
3349 # - A value of 0 if it's always undefined
3350 # - A value of -1 if it's an arch and it depends of script parameters.
3351 # - A value of -2 if it's not recognized when filtering MC blocks.
3352 kdKnownDefines = {
3353 'IEM_WITH_ONE_BYTE_TABLE': 1,
3354 'IEM_WITH_TWO_BYTE_TABLE': 1,
3355 'IEM_WITH_THREE_0F_38': 1,
3356 'IEM_WITH_THREE_0F_3A': 1,
3357 'IEM_WITH_THREE_BYTE_TABLES': 1,
3358 'IEM_WITH_3DNOW': 1,
3359 'IEM_WITH_3DNOW_TABLE': 1,
3360 'IEM_WITH_VEX': 1,
3361 'IEM_WITH_VEX_TABLES': 1,
3362 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3363 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3364 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3365 'LOG_ENABLED': 1,
3366 'RT_WITHOUT_PRAGMA_ONCE': 0,
3367 'TST_IEM_CHECK_MC': 0,
3368 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3369 'RT_ARCH_AMD64': -1,
3370 'RT_ARCH_ARM64': -1,
3371 'RT_ARCH_ARM32': -1,
3372 'RT_ARCH_X86': -1,
3373 'RT_ARCH_SPARC': -1,
3374 'RT_ARCH_SPARC64': -1,
3375 };
3376 kdBuildArchToIprt = {
3377 'amd64': 'RT_ARCH_AMD64',
3378 'arm64': 'RT_ARCH_ARM64',
3379 'sparc32': 'RT_ARCH_SPARC64',
3380 };
3381 ## For parsing the next defined(xxxx).
3382 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3383
3384 def __init__(self, sType, sExpr):
3385 self.sType = sType;
3386 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3387 self.aoElif = [] # type: List[PreprocessorConditional]
3388 self.fInElse = [];
3389 if sType in ('if', 'elif'):
3390 self.checkExpression(sExpr);
3391 else:
3392 self.checkSupportedDefine(sExpr)
3393
3394 @staticmethod
3395 def checkSupportedDefine(sDefine):
3396 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3397 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3398 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3399 return True;
3400 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3401 return True;
3402 raise Exception('Unsupported define: %s' % (sDefine,));
3403
3404 @staticmethod
3405 def checkExpression(sExpr):
3406 """ Check that the expression is supported. Raises exception if not. """
3407 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3408 if sExpr in ('0', '1'):
3409 return True;
3410
3411 off = 0;
3412 cParan = 0;
3413 while off < len(sExpr):
3414 ch = sExpr[off];
3415
3416 # Unary operator or parentheses:
3417 if ch in ('(', '!'):
3418 if ch == '(':
3419 cParan += 1;
3420 off += 1;
3421 else:
3422 # defined(xxxx)
3423 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3424 if oMatch:
3425 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3426 elif sExpr[off:] != '1':
3427 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3428 off = oMatch.end();
3429
3430 # Look for closing parentheses.
3431 while off < len(sExpr) and sExpr[off].isspace():
3432 off += 1;
3433 if cParan > 0:
3434 while off < len(sExpr) and sExpr[off] == ')':
3435 if cParan <= 0:
3436 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3437 cParan -= 1;
3438 off += 1;
3439 while off < len(sExpr) and sExpr[off].isspace():
3440 off += 1;
3441
3442 # Look for binary operator.
3443 if off >= len(sExpr):
3444 break;
3445 if sExpr[off:off + 2] in ('||', '&&'):
3446 off += 2;
3447 else:
3448 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3449
3450 # Skip spaces.
3451 while off < len(sExpr) and sExpr[off].isspace():
3452 off += 1;
3453 if cParan != 0:
3454 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3455 return True;
3456
3457 @staticmethod
3458 def isArchIncludedInExpr(sExpr, sArch):
3459 """ Checks if sArch is included in the given expression. """
3460 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3461 if sExpr == '0':
3462 return False;
3463 if sExpr == '1':
3464 return True;
3465 off = 0;
3466 while off < len(sExpr):
3467 # defined(xxxx)
3468 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3469 if not oMatch:
3470 if sExpr[off:] == '1':
3471 return True;
3472 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3473 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3474 return True;
3475 off = oMatch.end();
3476
3477 # Look for OR operator.
3478 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3479 off += 1;
3480 if off >= len(sExpr):
3481 break;
3482 if sExpr.startswith('||'):
3483 off += 2;
3484 else:
3485 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3486
3487 return False;
3488
3489 @staticmethod
3490 def matchArch(sDefine, sArch):
3491 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3492 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3493
3494 @staticmethod
3495 def matchDefined(sExpr, sArch):
3496 """ Check the result of an ifdef/ifndef expression, given sArch. """
3497 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3498 if iDefine == -2:
3499 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3500 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3501
3502 def isArchIncludedInPrimaryBlock(self, sArch):
3503 """ Checks if sArch is included in the (primary) 'if' block. """
3504 if self.sType == 'ifdef':
3505 return self.matchDefined(self.sExpr, sArch);
3506 if self.sType == 'ifndef':
3507 return not self.matchDefined(self.sExpr, sArch);
3508 return self.isArchIncludedInExpr(self.sExpr, sArch);
3509
3510 @staticmethod
3511 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3512 """ Checks if sArch is included in the current conditional block. """
3513 _ = iLine;
3514 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3515 for oCond in aoCppCondStack:
3516 if oCond.isArchIncludedInPrimaryBlock(sArch):
3517 if oCond.aoElif or oCond.fInElse:
3518 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3519 return False;
3520 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3521 else:
3522 fFine = False;
3523 for oElifCond in oCond.aoElif:
3524 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3525 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3526 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3527 return False;
3528 fFine = True;
3529 if not fFine and not oCond.fInElse:
3530 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3531 return False;
3532 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3533 return True;
3534
3535 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3536 self.sSrcFile = sSrcFile;
3537 self.asLines = asLines;
3538 self.iLine = 0;
3539 self.iState = self.kiCode;
3540 self.sComment = '';
3541 self.iCommentLine = 0;
3542 self.aoCurInstrs = [] # type: List[Instruction]
3543 self.oCurFunction = None # type: DecoderFunction
3544 self.iMcBlockInFunc = 0;
3545 self.oCurMcBlock = None # type: McBlock
3546 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3547 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3548 if oInheritMacrosFrom:
3549 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3550 self.oReMacros = oInheritMacrosFrom.oReMacros;
3551 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3552 self.sHostArch = sHostArch;
3553
3554 assert sDefaultMap in g_dInstructionMaps;
3555 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3556
3557 self.cTotalInstr = 0;
3558 self.cTotalStubs = 0;
3559 self.cTotalTagged = 0;
3560 self.cTotalMcBlocks = 0;
3561
3562 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3563 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3564 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3565 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3566 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3567 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3568 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3569 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3570 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3571 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3572 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3573 self.fDebug = True;
3574 self.fDebugMc = False;
3575 self.fDebugPreproc = False;
3576
3577 self.dTagHandlers = {
3578 '@opbrief': self.parseTagOpBrief,
3579 '@opdesc': self.parseTagOpDesc,
3580 '@opmnemonic': self.parseTagOpMnemonic,
3581 '@op1': self.parseTagOpOperandN,
3582 '@op2': self.parseTagOpOperandN,
3583 '@op3': self.parseTagOpOperandN,
3584 '@op4': self.parseTagOpOperandN,
3585 '@oppfx': self.parseTagOpPfx,
3586 '@opmaps': self.parseTagOpMaps,
3587 '@opcode': self.parseTagOpcode,
3588 '@opcodesub': self.parseTagOpcodeSub,
3589 '@openc': self.parseTagOpEnc,
3590 #@opfltest: Lists all flags that will be used as input in some way.
3591 '@opfltest': self.parseTagOpEFlags,
3592 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3593 '@opflmodify': self.parseTagOpEFlags,
3594 #@opflclear: Lists all flags that will be set (set to 1).
3595 '@opflset': self.parseTagOpEFlags,
3596 #@opflclear: Lists all flags that will be cleared (set to 0).
3597 '@opflclear': self.parseTagOpEFlags,
3598 #@opflundef: List of flag documented as undefined.
3599 '@opflundef': self.parseTagOpEFlags,
3600 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3601 '@opflclass': self.parseTagOpEFlagsClass,
3602 '@ophints': self.parseTagOpHints,
3603 '@opdisenum': self.parseTagOpDisEnum,
3604 '@opmincpu': self.parseTagOpMinCpu,
3605 '@opcpuid': self.parseTagOpCpuId,
3606 '@opgroup': self.parseTagOpGroup,
3607 '@opunused': self.parseTagOpUnusedInvalid,
3608 '@opinvalid': self.parseTagOpUnusedInvalid,
3609 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3610 '@optest': self.parseTagOpTest,
3611 '@optestign': self.parseTagOpTestIgnore,
3612 '@optestignore': self.parseTagOpTestIgnore,
3613 '@opcopytests': self.parseTagOpCopyTests,
3614 '@oponly': self.parseTagOpOnlyTest,
3615 '@oponlytest': self.parseTagOpOnlyTest,
3616 '@opxcpttype': self.parseTagOpXcptType,
3617 '@opstats': self.parseTagOpStats,
3618 '@opfunction': self.parseTagOpFunction,
3619 '@opdone': self.parseTagOpDone,
3620 };
3621 for i in range(48):
3622 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3623 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3624
3625 self.asErrors = [];
3626
3627 def raiseError(self, sMessage):
3628 """
3629 Raise error prefixed with the source and line number.
3630 """
3631 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3632
3633 def raiseCommentError(self, iLineInComment, sMessage):
3634 """
3635 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3636 """
3637 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3638
3639 def error(self, sMessage):
3640 """
3641 Adds an error.
3642 returns False;
3643 """
3644 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3645 return False;
3646
3647 def errorOnLine(self, iLine, sMessage):
3648 """
3649 Adds an error.
3650 returns False;
3651 """
3652 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3653 return False;
3654
3655 def errorComment(self, iLineInComment, sMessage):
3656 """
3657 Adds a comment error.
3658 returns False;
3659 """
3660 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3661 return False;
3662
3663 def printErrors(self):
3664 """
3665 Print the errors to stderr.
3666 Returns number of errors.
3667 """
3668 if self.asErrors:
3669 sys.stderr.write(u''.join(self.asErrors));
3670 return len(self.asErrors);
3671
3672 def debug(self, sMessage):
3673 """
3674 For debugging.
3675 """
3676 if self.fDebug:
3677 print('debug: %s' % (sMessage,), file = sys.stderr);
3678
3679 def stripComments(self, sLine):
3680 """
3681 Returns sLine with comments stripped.
3682
3683 Complains if traces of incomplete multi-line comments are encountered.
3684 """
3685 sLine = self.oReComment.sub(" ", sLine);
3686 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3687 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3688 return sLine;
3689
3690 def parseFunctionTable(self, sLine):
3691 """
3692 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3693
3694 Note! Updates iLine as it consumes the whole table.
3695 """
3696
3697 #
3698 # Extract the table name.
3699 #
3700 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3701 oMap = g_dInstructionMapsByIemName.get(sName);
3702 if not oMap:
3703 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3704 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3705
3706 #
3707 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3708 # entries per byte:
3709 # no prefix, 066h prefix, f3h prefix, f2h prefix
3710 # Those tables has 256 & 32 entries respectively.
3711 #
3712 cEntriesPerByte = 4;
3713 cValidTableLength = 1024;
3714 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3715
3716 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3717 if oEntriesMatch:
3718 cEntriesPerByte = 1;
3719 cValidTableLength = int(oEntriesMatch.group(1));
3720 asPrefixes = (None,);
3721
3722 #
3723 # The next line should be '{' and nothing else.
3724 #
3725 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3726 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3727 self.iLine += 1;
3728
3729 #
3730 # Parse till we find the end of the table.
3731 #
3732 iEntry = 0;
3733 while self.iLine < len(self.asLines):
3734 # Get the next line and strip comments and spaces (assumes no
3735 # multi-line comments).
3736 sLine = self.asLines[self.iLine];
3737 self.iLine += 1;
3738 sLine = self.stripComments(sLine).strip();
3739
3740 # Split the line up into entries, expanding IEMOP_X4 usage.
3741 asEntries = sLine.split(',');
3742 for i in range(len(asEntries) - 1, -1, -1):
3743 sEntry = asEntries[i].strip();
3744 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3745 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3746 asEntries.insert(i + 1, sEntry);
3747 asEntries.insert(i + 1, sEntry);
3748 asEntries.insert(i + 1, sEntry);
3749 if sEntry:
3750 asEntries[i] = sEntry;
3751 else:
3752 del asEntries[i];
3753
3754 # Process the entries.
3755 for sEntry in asEntries:
3756 if sEntry in ('};', '}'):
3757 if iEntry != cValidTableLength:
3758 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3759 return True;
3760 if sEntry.startswith('iemOp_Invalid'):
3761 pass; # skip
3762 else:
3763 # Look up matching instruction by function.
3764 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3765 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3766 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3767 if aoInstr:
3768 if not isinstance(aoInstr, list):
3769 aoInstr = [aoInstr,];
3770 oInstr = None;
3771 for oCurInstr in aoInstr:
3772 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3773 pass;
3774 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3775 oCurInstr.sPrefix = sPrefix;
3776 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3777 oCurInstr.sOpcode = sOpcode;
3778 oCurInstr.sPrefix = sPrefix;
3779 else:
3780 continue;
3781 oInstr = oCurInstr;
3782 break;
3783 if not oInstr:
3784 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3785 aoInstr.append(oInstr);
3786 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3787 g_aoAllInstructions.append(oInstr);
3788 oMap.aoInstructions.append(oInstr);
3789 else:
3790 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3791 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3792 iEntry += 1;
3793
3794 return self.error('Unexpected end of file in PFNIEMOP table');
3795
3796 def addInstruction(self, iLine = None):
3797 """
3798 Adds an instruction.
3799 """
3800 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3801 g_aoAllInstructions.append(oInstr);
3802 self.aoCurInstrs.append(oInstr);
3803 return oInstr;
3804
3805 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3806 """
3807 Derives the mnemonic and operands from a IEM stats base name like string.
3808 """
3809 if oInstr.sMnemonic is None:
3810 asWords = sStats.split('_');
3811 oInstr.sMnemonic = asWords[0].lower();
3812 if len(asWords) > 1 and not oInstr.aoOperands:
3813 for sType in asWords[1:]:
3814 if sType in g_kdOpTypes:
3815 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3816 else:
3817 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3818 return False;
3819 return True;
3820
3821 def doneInstructionOne(self, oInstr, iLine):
3822 """
3823 Complete the parsing by processing, validating and expanding raw inputs.
3824 """
3825 assert oInstr.iLineCompleted is None;
3826 oInstr.iLineCompleted = iLine;
3827
3828 #
3829 # Specified instructions.
3830 #
3831 if oInstr.cOpTags > 0:
3832 if oInstr.sStats is None:
3833 pass;
3834
3835 #
3836 # Unspecified legacy stuff. We generally only got a few things to go on here.
3837 # /** Opcode 0x0f 0x00 /0. */
3838 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3839 #
3840 else:
3841 #if oInstr.sRawOldOpcodes:
3842 #
3843 #if oInstr.sMnemonic:
3844 pass;
3845
3846 #
3847 # Common defaults.
3848 #
3849
3850 # Guess mnemonic and operands from stats if the former is missing.
3851 if oInstr.sMnemonic is None:
3852 if oInstr.sStats is not None:
3853 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3854 elif oInstr.sFunction is not None:
3855 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3856
3857 # Derive the disassembler op enum constant from the mnemonic.
3858 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3859 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3860
3861 # Derive the IEM statistics base name from mnemonic and operand types.
3862 if oInstr.sStats is None:
3863 if oInstr.sFunction is not None:
3864 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3865 elif oInstr.sMnemonic is not None:
3866 oInstr.sStats = oInstr.sMnemonic;
3867 for oOperand in oInstr.aoOperands:
3868 if oOperand.sType:
3869 oInstr.sStats += '_' + oOperand.sType;
3870
3871 # Derive the IEM function name from mnemonic and operand types.
3872 if oInstr.sFunction is None:
3873 if oInstr.sMnemonic is not None:
3874 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3875 for oOperand in oInstr.aoOperands:
3876 if oOperand.sType:
3877 oInstr.sFunction += '_' + oOperand.sType;
3878 elif oInstr.sStats:
3879 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3880
3881 #
3882 # Apply default map and then add the instruction to all it's groups.
3883 #
3884 if not oInstr.aoMaps:
3885 oInstr.aoMaps = [ self.oDefaultMap, ];
3886 for oMap in oInstr.aoMaps:
3887 oMap.aoInstructions.append(oInstr);
3888
3889 #
3890 # Derive encoding from operands and maps.
3891 #
3892 if oInstr.sEncoding is None:
3893 if not oInstr.aoOperands:
3894 if oInstr.fUnused and oInstr.sSubOpcode:
3895 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3896 else:
3897 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3898 elif oInstr.aoOperands[0].usesModRM():
3899 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3900 or oInstr.onlyInVexMaps():
3901 oInstr.sEncoding = 'VEX.ModR/M';
3902 else:
3903 oInstr.sEncoding = 'ModR/M';
3904
3905 #
3906 # Check the opstat value and add it to the opstat indexed dictionary.
3907 #
3908 if oInstr.sStats:
3909 if oInstr.sStats not in g_dAllInstructionsByStat:
3910 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3911 else:
3912 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3913 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3914
3915 #
3916 # Add to function indexed dictionary. We allow multiple instructions per function.
3917 #
3918 if oInstr.sFunction:
3919 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3920 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3921 else:
3922 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3923
3924 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3925 return True;
3926
3927 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3928 """
3929 Done with current instruction.
3930 """
3931 for oInstr in self.aoCurInstrs:
3932 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3933 if oInstr.fStub:
3934 self.cTotalStubs += 1;
3935
3936 self.cTotalInstr += len(self.aoCurInstrs);
3937
3938 self.sComment = '';
3939 self.aoCurInstrs = [];
3940 if fEndOfFunction:
3941 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3942 if self.oCurFunction:
3943 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3944 self.oCurFunction = None;
3945 self.iMcBlockInFunc = 0;
3946 return True;
3947
3948 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3949 """
3950 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3951 is False, only None values and empty strings are replaced.
3952 """
3953 for oInstr in self.aoCurInstrs:
3954 if fOverwrite is not True:
3955 oOldValue = getattr(oInstr, sAttrib);
3956 if oOldValue is not None:
3957 continue;
3958 setattr(oInstr, sAttrib, oValue);
3959
3960 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3961 """
3962 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3963 If fOverwrite is False, only None values and empty strings are replaced.
3964 """
3965 for oInstr in self.aoCurInstrs:
3966 aoArray = getattr(oInstr, sAttrib);
3967 while len(aoArray) <= iEntry:
3968 aoArray.append(None);
3969 if fOverwrite is True or aoArray[iEntry] is None:
3970 aoArray[iEntry] = oValue;
3971
3972 def parseCommentOldOpcode(self, asLines):
3973 """ Deals with 'Opcode 0xff /4' like comments """
3974 asWords = asLines[0].split();
3975 if len(asWords) >= 2 \
3976 and asWords[0] == 'Opcode' \
3977 and ( asWords[1].startswith('0x')
3978 or asWords[1].startswith('0X')):
3979 asWords = asWords[:1];
3980 for iWord, sWord in enumerate(asWords):
3981 if sWord.startswith('0X'):
3982 sWord = '0x' + sWord[:2];
3983 asWords[iWord] = asWords;
3984 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3985
3986 return False;
3987
3988 def ensureInstructionForOpTag(self, iTagLine):
3989 """ Ensure there is an instruction for the op-tag being parsed. """
3990 if not self.aoCurInstrs:
3991 self.addInstruction(self.iCommentLine + iTagLine);
3992 for oInstr in self.aoCurInstrs:
3993 oInstr.cOpTags += 1;
3994 if oInstr.cOpTags == 1:
3995 self.cTotalTagged += 1;
3996 return self.aoCurInstrs[-1];
3997
3998 @staticmethod
3999 def flattenSections(aasSections):
4000 """
4001 Flattens multiline sections into stripped single strings.
4002 Returns list of strings, on section per string.
4003 """
4004 asRet = [];
4005 for asLines in aasSections:
4006 if asLines:
4007 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4008 return asRet;
4009
4010 @staticmethod
4011 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4012 """
4013 Flattens sections into a simple stripped string with newlines as
4014 section breaks. The final section does not sport a trailing newline.
4015 """
4016 # Typical: One section with a single line.
4017 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4018 return aasSections[0][0].strip();
4019
4020 sRet = '';
4021 for iSection, asLines in enumerate(aasSections):
4022 if asLines:
4023 if iSection > 0:
4024 sRet += sSectionSep;
4025 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4026 return sRet;
4027
4028
4029
4030 ## @name Tag parsers
4031 ## @{
4032
4033 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4034 """
4035 Tag: @opbrief
4036 Value: Text description, multiple sections, appended.
4037
4038 Brief description. If not given, it's the first sentence from @opdesc.
4039 """
4040 oInstr = self.ensureInstructionForOpTag(iTagLine);
4041
4042 # Flatten and validate the value.
4043 sBrief = self.flattenAllSections(aasSections);
4044 if not sBrief:
4045 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4046 if sBrief[-1] != '.':
4047 sBrief = sBrief + '.';
4048 if len(sBrief) > 180:
4049 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4050 offDot = sBrief.find('.');
4051 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4052 offDot = sBrief.find('.', offDot + 1);
4053 if offDot >= 0 and offDot != len(sBrief) - 1:
4054 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4055
4056 # Update the instruction.
4057 if oInstr.sBrief is not None:
4058 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4059 % (sTag, oInstr.sBrief, sBrief,));
4060 _ = iEndLine;
4061 return True;
4062
4063 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4064 """
4065 Tag: @opdesc
4066 Value: Text description, multiple sections, appended.
4067
4068 It is used to describe instructions.
4069 """
4070 oInstr = self.ensureInstructionForOpTag(iTagLine);
4071 if aasSections:
4072 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4073 return True;
4074
4075 _ = sTag; _ = iEndLine;
4076 return True;
4077
4078 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4079 """
4080 Tag: @opmenmonic
4081 Value: mnemonic
4082
4083 The 'mnemonic' value must be a valid C identifier string. Because of
4084 prefixes, groups and whatnot, there times when the mnemonic isn't that
4085 of an actual assembler mnemonic.
4086 """
4087 oInstr = self.ensureInstructionForOpTag(iTagLine);
4088
4089 # Flatten and validate the value.
4090 sMnemonic = self.flattenAllSections(aasSections);
4091 if not self.oReMnemonic.match(sMnemonic):
4092 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4093 if oInstr.sMnemonic is not None:
4094 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4095 % (sTag, oInstr.sMnemonic, sMnemonic,));
4096 oInstr.sMnemonic = sMnemonic
4097
4098 _ = iEndLine;
4099 return True;
4100
4101 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4102 """
4103 Tags: @op1, @op2, @op3, @op4
4104 Value: [where:]type
4105
4106 The 'where' value indicates where the operand is found, like the 'reg'
4107 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4108 a list.
4109
4110 The 'type' value indicates the operand type. These follow the types
4111 given in the opcode tables in the CPU reference manuals.
4112 See Instruction.kdOperandTypes for a list.
4113
4114 """
4115 oInstr = self.ensureInstructionForOpTag(iTagLine);
4116 idxOp = int(sTag[-1]) - 1;
4117 assert 0 <= idxOp < 4;
4118
4119 # flatten, split up, and validate the "where:type" value.
4120 sFlattened = self.flattenAllSections(aasSections);
4121 asSplit = sFlattened.split(':');
4122 if len(asSplit) == 1:
4123 sType = asSplit[0];
4124 sWhere = None;
4125 elif len(asSplit) == 2:
4126 (sWhere, sType) = asSplit;
4127 else:
4128 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4129
4130 if sType not in g_kdOpTypes:
4131 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4132 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4133 if sWhere is None:
4134 sWhere = g_kdOpTypes[sType][1];
4135 elif sWhere not in g_kdOpLocations:
4136 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4137 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4138
4139 # Insert the operand, refusing to overwrite an existing one.
4140 while idxOp >= len(oInstr.aoOperands):
4141 oInstr.aoOperands.append(None);
4142 if oInstr.aoOperands[idxOp] is not None:
4143 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4144 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4145 sWhere, sType,));
4146 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4147
4148 _ = iEndLine;
4149 return True;
4150
4151 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4152 """
4153 Tag: @opmaps
4154 Value: map[,map2]
4155
4156 Indicates which maps the instruction is in. There is a default map
4157 associated with each input file.
4158 """
4159 oInstr = self.ensureInstructionForOpTag(iTagLine);
4160
4161 # Flatten, split up and validate the value.
4162 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4163 asMaps = sFlattened.split(',');
4164 if not asMaps:
4165 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4166 for sMap in asMaps:
4167 if sMap not in g_dInstructionMaps:
4168 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4169 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4170
4171 # Add the maps to the current list. Throw errors on duplicates.
4172 for oMap in oInstr.aoMaps:
4173 if oMap.sName in asMaps:
4174 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4175
4176 for sMap in asMaps:
4177 oMap = g_dInstructionMaps[sMap];
4178 if oMap not in oInstr.aoMaps:
4179 oInstr.aoMaps.append(oMap);
4180 else:
4181 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4182
4183 _ = iEndLine;
4184 return True;
4185
4186 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4187 """
4188 Tag: @oppfx
4189 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4190
4191 Required prefix for the instruction. (In a (E)VEX context this is the
4192 value of the 'pp' field rather than an actual prefix.)
4193 """
4194 oInstr = self.ensureInstructionForOpTag(iTagLine);
4195
4196 # Flatten and validate the value.
4197 sFlattened = self.flattenAllSections(aasSections);
4198 asPrefixes = sFlattened.split();
4199 if len(asPrefixes) > 1:
4200 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4201
4202 sPrefix = asPrefixes[0].lower();
4203 if sPrefix == 'none':
4204 sPrefix = 'none';
4205 elif sPrefix == 'n/a':
4206 sPrefix = None;
4207 else:
4208 if len(sPrefix) == 2:
4209 sPrefix = '0x' + sPrefix;
4210 if not _isValidOpcodeByte(sPrefix):
4211 if sPrefix != '!0xf3':
4212 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4213
4214 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4215 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4216
4217 # Set it.
4218 if oInstr.sPrefix is not None:
4219 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4220 oInstr.sPrefix = sPrefix;
4221
4222 _ = iEndLine;
4223 return True;
4224
4225 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4226 """
4227 Tag: @opcode
4228 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4229
4230 The opcode byte or sub-byte for the instruction in the context of a map.
4231 """
4232 oInstr = self.ensureInstructionForOpTag(iTagLine);
4233
4234 # Flatten and validate the value.
4235 sOpcode = self.flattenAllSections(aasSections);
4236 if _isValidOpcodeByte(sOpcode):
4237 pass;
4238 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4239 pass;
4240 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4241 pass;
4242 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4243 pass;
4244 else:
4245 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4246
4247 # Set it.
4248 if oInstr.sOpcode is not None:
4249 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4250 oInstr.sOpcode = sOpcode;
4251
4252 _ = iEndLine;
4253 return True;
4254
4255 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4256 """
4257 Tag: @opcodesub
4258 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4259 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4260 | !11 rex.w=0 | !11 mr/reg rex.w=0
4261 | !11 rex.w=1 | !11 mr/reg rex.w=1
4262
4263 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4264 represents exactly two different instructions. The more proper way would
4265 be to go via maps with two members, but this is faster.
4266 """
4267 oInstr = self.ensureInstructionForOpTag(iTagLine);
4268
4269 # Flatten and validate the value.
4270 sSubOpcode = self.flattenAllSections(aasSections);
4271 if sSubOpcode not in g_kdSubOpcodes:
4272 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4273 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4274 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4275
4276 # Set it.
4277 if oInstr.sSubOpcode is not None:
4278 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4279 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4280 oInstr.sSubOpcode = sSubOpcode;
4281
4282 _ = iEndLine;
4283 return True;
4284
4285 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4286 """
4287 Tag: @openc
4288 Value: ModR/M|fixed|prefix|<map name>
4289
4290 The instruction operand encoding style.
4291 """
4292 oInstr = self.ensureInstructionForOpTag(iTagLine);
4293
4294 # Flatten and validate the value.
4295 sEncoding = self.flattenAllSections(aasSections);
4296 if sEncoding in g_kdEncodings:
4297 pass;
4298 elif sEncoding in g_dInstructionMaps:
4299 pass;
4300 elif not _isValidOpcodeByte(sEncoding):
4301 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4302
4303 # Set it.
4304 if oInstr.sEncoding is not None:
4305 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4306 % ( sTag, oInstr.sEncoding, sEncoding,));
4307 oInstr.sEncoding = sEncoding;
4308
4309 _ = iEndLine;
4310 return True;
4311
4312 ## EFlags tag to Instruction attribute name.
4313 kdOpFlagToAttr = {
4314 '@opfltest': 'asFlTest',
4315 '@opflmodify': 'asFlModify',
4316 '@opflundef': 'asFlUndefined',
4317 '@opflset': 'asFlSet',
4318 '@opflclear': 'asFlClear',
4319 };
4320
4321 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4322 """
4323 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4324 Value: <eflags specifier>
4325
4326 """
4327 oInstr = self.ensureInstructionForOpTag(iTagLine);
4328
4329 # Flatten, split up and validate the values.
4330 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4331 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4332 asFlags = [];
4333 else:
4334 fRc = True;
4335 for iFlag, sFlag in enumerate(asFlags):
4336 if sFlag not in g_kdEFlagsMnemonics:
4337 if sFlag.strip() in g_kdEFlagsMnemonics:
4338 asFlags[iFlag] = sFlag.strip();
4339 else:
4340 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4341 if not fRc:
4342 return False;
4343
4344 # Set them.
4345 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4346 if asOld is not None and len(asOld) > 0:
4347 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4348 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4349
4350 _ = iEndLine;
4351 return True;
4352
4353 ## EFLAGS class definitions with their attribute lists.
4354 kdEFlagsClasses = {
4355 'arithmetic': { # add, sub, ...
4356 'asFlTest': [],
4357 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4358 'asFlClear': [],
4359 'asFlSet': [],
4360 'asFlUndefined': [],
4361 },
4362 'arithmetic_carry': { # adc, sbb, ...
4363 'asFlTest': [ 'cf', ],
4364 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4365 'asFlClear': [],
4366 'asFlSet': [],
4367 'asFlUndefined': [],
4368 },
4369 'incdec': {
4370 'asFlTest': [],
4371 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4372 'asFlClear': [],
4373 'asFlSet': [],
4374 'asFlUndefined': [],
4375 },
4376 'division': { ## @todo specify intel/amd differences...
4377 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4378 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4379 'asFlClear': [],
4380 'asFlSet': [],
4381 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4382 },
4383 'multiply': { ## @todo specify intel/amd differences...
4384 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4385 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4386 'asFlClear': [], # between IMUL and MUL.
4387 'asFlSet': [],
4388 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4389 },
4390 'logical': { # and, or, xor, ...
4391 'asFlTest': [],
4392 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4393 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4394 'asFlSet': [],
4395 'asFlUndefined': [ 'af', ],
4396 },
4397 'rotate_1': { # rol and ror with fixed 1 shift count
4398 'asFlTest': [],
4399 'asFlModify': [ 'cf', 'of', ],
4400 'asFlClear': [],
4401 'asFlSet': [],
4402 'asFlUndefined': [],
4403 },
4404 'rotate_count': { # rol and ror w/o fixed 1 shift count
4405 'asFlTest': [],
4406 'asFlModify': [ 'cf', 'of', ],
4407 'asFlClear': [],
4408 'asFlSet': [],
4409 'asFlUndefined': [ 'of', ],
4410 },
4411 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4412 'asFlTest': [ 'cf', ],
4413 'asFlModify': [ 'cf', 'of', ],
4414 'asFlClear': [],
4415 'asFlSet': [],
4416 'asFlUndefined': [],
4417 },
4418 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4419 'asFlTest': [ 'cf', ],
4420 'asFlModify': [ 'cf', 'of', ],
4421 'asFlClear': [],
4422 'asFlSet': [],
4423 'asFlUndefined': [ 'of', ],
4424 },
4425 'shift_1': { # shl, shr or sar with fixed 1 count.
4426 'asFlTest': [],
4427 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4428 'asFlClear': [],
4429 'asFlSet': [],
4430 'asFlUndefined': [ 'af', ],
4431 },
4432 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4433 'asFlTest': [],
4434 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4435 'asFlClear': [],
4436 'asFlSet': [],
4437 'asFlUndefined': [ 'af', 'of', ],
4438 },
4439 'bitmap': { # bt, btc, btr, btc
4440 'asFlTest': [],
4441 'asFlModify': [ 'cf', ],
4442 'asFlClear': [],
4443 'asFlSet': [],
4444 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4445 },
4446 'unchanged': {
4447 'asFlTest': [],
4448 'asFlModify': [],
4449 'asFlClear': [],
4450 'asFlSet': [],
4451 'asFlUndefined': [],
4452 },
4453 };
4454 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4455 """
4456 Tags: @opflclass
4457 Value: arithmetic, logical, ...
4458
4459 """
4460 oInstr = self.ensureInstructionForOpTag(iTagLine);
4461
4462 # Flatten and validate the value.
4463 sClass = self.flattenAllSections(aasSections);
4464 kdAttribs = self.kdEFlagsClasses.get(sClass);
4465 if not kdAttribs:
4466 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4467 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4468
4469 # Set the attributes.
4470 for sAttrib, asFlags in kdAttribs.items():
4471 asOld = getattr(oInstr, sAttrib);
4472 if asOld is not None:
4473 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4474 % (sTag, asOld, asFlags, sAttrib));
4475 setattr(oInstr, sAttrib, asFlags);
4476
4477 _ = iEndLine;
4478 return True;
4479
4480 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4481 """
4482 Tag: @ophints
4483 Value: Comma or space separated list of flags and hints.
4484
4485 This covers the disassembler flags table and more.
4486 """
4487 oInstr = self.ensureInstructionForOpTag(iTagLine);
4488
4489 # Flatten as a space separated list, split it up and validate the values.
4490 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4491 if len(asHints) == 1 and asHints[0].lower() == 'none':
4492 asHints = [];
4493 else:
4494 fRc = True;
4495 for iHint, sHint in enumerate(asHints):
4496 if sHint not in g_kdHints:
4497 if sHint.strip() in g_kdHints:
4498 sHint[iHint] = sHint.strip();
4499 else:
4500 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4501 if not fRc:
4502 return False;
4503
4504 # Append them.
4505 for sHint in asHints:
4506 if sHint not in oInstr.dHints:
4507 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4508 else:
4509 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4510
4511 _ = iEndLine;
4512 return True;
4513
4514 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4515 """
4516 Tag: @opdisenum
4517 Value: OP_XXXX
4518
4519 This is for select a specific (legacy) disassembler enum value for the
4520 instruction.
4521 """
4522 oInstr = self.ensureInstructionForOpTag(iTagLine);
4523
4524 # Flatten and split.
4525 asWords = self.flattenAllSections(aasSections).split();
4526 if len(asWords) != 1:
4527 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4528 if not asWords:
4529 return False;
4530 sDisEnum = asWords[0];
4531 if not self.oReDisEnum.match(sDisEnum):
4532 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4533 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4534
4535 # Set it.
4536 if oInstr.sDisEnum is not None:
4537 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4538 oInstr.sDisEnum = sDisEnum;
4539
4540 _ = iEndLine;
4541 return True;
4542
4543 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4544 """
4545 Tag: @opmincpu
4546 Value: <simple CPU name>
4547
4548 Indicates when this instruction was introduced.
4549 """
4550 oInstr = self.ensureInstructionForOpTag(iTagLine);
4551
4552 # Flatten the value, split into words, make sure there's just one, valid it.
4553 asCpus = self.flattenAllSections(aasSections).split();
4554 if len(asCpus) > 1:
4555 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4556
4557 sMinCpu = asCpus[0];
4558 if sMinCpu in g_kdCpuNames:
4559 oInstr.sMinCpu = sMinCpu;
4560 else:
4561 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4562 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4563
4564 # Set it.
4565 if oInstr.sMinCpu is None:
4566 oInstr.sMinCpu = sMinCpu;
4567 elif oInstr.sMinCpu != sMinCpu:
4568 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4569
4570 _ = iEndLine;
4571 return True;
4572
4573 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4574 """
4575 Tag: @opcpuid
4576 Value: none | <CPUID flag specifier>
4577
4578 CPUID feature bit which is required for the instruction to be present.
4579 """
4580 oInstr = self.ensureInstructionForOpTag(iTagLine);
4581
4582 # Flatten as a space separated list, split it up and validate the values.
4583 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4584 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4585 asCpuIds = [];
4586 else:
4587 fRc = True;
4588 for iCpuId, sCpuId in enumerate(asCpuIds):
4589 if sCpuId not in g_kdCpuIdFlags:
4590 if sCpuId.strip() in g_kdCpuIdFlags:
4591 sCpuId[iCpuId] = sCpuId.strip();
4592 else:
4593 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4594 if not fRc:
4595 return False;
4596
4597 # Append them.
4598 for sCpuId in asCpuIds:
4599 if sCpuId not in oInstr.asCpuIds:
4600 oInstr.asCpuIds.append(sCpuId);
4601 else:
4602 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4603
4604 _ = iEndLine;
4605 return True;
4606
4607 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4608 """
4609 Tag: @opgroup
4610 Value: op_grp1[_subgrp2[_subsubgrp3]]
4611
4612 Instruction grouping.
4613 """
4614 oInstr = self.ensureInstructionForOpTag(iTagLine);
4615
4616 # Flatten as a space separated list, split it up and validate the values.
4617 asGroups = self.flattenAllSections(aasSections).split();
4618 if len(asGroups) != 1:
4619 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4620 sGroup = asGroups[0];
4621 if not self.oReGroupName.match(sGroup):
4622 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4623 % (sTag, sGroup, self.oReGroupName.pattern));
4624
4625 # Set it.
4626 if oInstr.sGroup is not None:
4627 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4628 oInstr.sGroup = sGroup;
4629
4630 _ = iEndLine;
4631 return True;
4632
4633 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4634 """
4635 Tag: @opunused, @opinvalid, @opinvlstyle
4636 Value: <invalid opcode behaviour style>
4637
4638 The @opunused indicates the specification is for a currently unused
4639 instruction encoding.
4640
4641 The @opinvalid indicates the specification is for an invalid currently
4642 instruction encoding (like UD2).
4643
4644 The @opinvlstyle just indicates how CPUs decode the instruction when
4645 not supported (@opcpuid, @opmincpu) or disabled.
4646 """
4647 oInstr = self.ensureInstructionForOpTag(iTagLine);
4648
4649 # Flatten as a space separated list, split it up and validate the values.
4650 asStyles = self.flattenAllSections(aasSections).split();
4651 if len(asStyles) != 1:
4652 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4653 sStyle = asStyles[0];
4654 if sStyle not in g_kdInvalidStyles:
4655 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4656 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4657 # Set it.
4658 if oInstr.sInvalidStyle is not None:
4659 return self.errorComment(iTagLine,
4660 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4661 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4662 oInstr.sInvalidStyle = sStyle;
4663 if sTag == '@opunused':
4664 oInstr.fUnused = True;
4665 elif sTag == '@opinvalid':
4666 oInstr.fInvalid = True;
4667
4668 _ = iEndLine;
4669 return True;
4670
4671 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4672 """
4673 Tag: @optest
4674 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4675 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4676
4677 The main idea here is to generate basic instruction tests.
4678
4679 The probably simplest way of handling the diverse input, would be to use
4680 it to produce size optimized byte code for a simple interpreter that
4681 modifies the register input and output states.
4682
4683 An alternative to the interpreter would be creating multiple tables,
4684 but that becomes rather complicated wrt what goes where and then to use
4685 them in an efficient manner.
4686 """
4687 oInstr = self.ensureInstructionForOpTag(iTagLine);
4688
4689 #
4690 # Do it section by section.
4691 #
4692 for asSectionLines in aasSections:
4693 #
4694 # Sort the input into outputs, inputs and selector conditions.
4695 #
4696 sFlatSection = self.flattenAllSections([asSectionLines,]);
4697 if not sFlatSection:
4698 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4699 continue;
4700 oTest = InstructionTest(oInstr);
4701
4702 asSelectors = [];
4703 asInputs = [];
4704 asOutputs = [];
4705 asCur = asOutputs;
4706 fRc = True;
4707 asWords = sFlatSection.split();
4708 for iWord in range(len(asWords) - 1, -1, -1):
4709 sWord = asWords[iWord];
4710 # Check for array switchers.
4711 if sWord == '->':
4712 if asCur != asOutputs:
4713 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4714 break;
4715 asCur = asInputs;
4716 elif sWord == '/':
4717 if asCur != asInputs:
4718 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4719 break;
4720 asCur = asSelectors;
4721 else:
4722 asCur.insert(0, sWord);
4723
4724 #
4725 # Validate and add selectors.
4726 #
4727 for sCond in asSelectors:
4728 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4729 oSelector = None;
4730 for sOp in TestSelector.kasCompareOps:
4731 off = sCondExp.find(sOp);
4732 if off >= 0:
4733 sVariable = sCondExp[:off];
4734 sValue = sCondExp[off + len(sOp):];
4735 if sVariable in TestSelector.kdVariables:
4736 if sValue in TestSelector.kdVariables[sVariable]:
4737 oSelector = TestSelector(sVariable, sOp, sValue);
4738 else:
4739 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4740 % ( sTag, sValue, sCond,
4741 TestSelector.kdVariables[sVariable].keys(),));
4742 else:
4743 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4744 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4745 break;
4746 if oSelector is not None:
4747 for oExisting in oTest.aoSelectors:
4748 if oExisting.sVariable == oSelector.sVariable:
4749 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4750 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4751 oTest.aoSelectors.append(oSelector);
4752 else:
4753 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4754
4755 #
4756 # Validate outputs and inputs, adding them to the test as we go along.
4757 #
4758 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4759 asValidFieldKinds = [ 'both', sDesc, ];
4760 for sItem in asItems:
4761 oItem = None;
4762 for sOp in TestInOut.kasOperators:
4763 off = sItem.find(sOp);
4764 if off < 0:
4765 continue;
4766 sField = sItem[:off];
4767 sValueType = sItem[off + len(sOp):];
4768 if sField in TestInOut.kdFields \
4769 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4770 asSplit = sValueType.split(':', 1);
4771 sValue = asSplit[0];
4772 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4773 if sType in TestInOut.kdTypes:
4774 oValid = TestInOut.kdTypes[sType].validate(sValue);
4775 if oValid is True:
4776 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4777 oItem = TestInOut(sField, sOp, sValue, sType);
4778 else:
4779 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4780 % ( sTag, sDesc, sItem, ));
4781 else:
4782 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4783 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4784 else:
4785 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4786 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4787 else:
4788 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4789 % ( sTag, sDesc, sField, sItem,
4790 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4791 if asVal[1] in asValidFieldKinds]),));
4792 break;
4793 if oItem is not None:
4794 for oExisting in aoDst:
4795 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4796 self.errorComment(iTagLine,
4797 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4798 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4799 aoDst.append(oItem);
4800 else:
4801 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4802
4803 #
4804 # .
4805 #
4806 if fRc:
4807 oInstr.aoTests.append(oTest);
4808 else:
4809 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4810 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4811 % (sTag, asSelectors, asInputs, asOutputs,));
4812
4813 _ = iEndLine;
4814 return True;
4815
4816 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4817 """
4818 Numbered @optest tag. Either @optest42 or @optest[42].
4819 """
4820 oInstr = self.ensureInstructionForOpTag(iTagLine);
4821
4822 iTest = 0;
4823 if sTag[-1] == ']':
4824 iTest = int(sTag[8:-1]);
4825 else:
4826 iTest = int(sTag[7:]);
4827
4828 if iTest != len(oInstr.aoTests):
4829 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4830 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4831
4832 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4833 """
4834 Tag: @optestign | @optestignore
4835 Value: <value is ignored>
4836
4837 This is a simple trick to ignore a test while debugging another.
4838
4839 See also @oponlytest.
4840 """
4841 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4842 return True;
4843
4844 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4845 """
4846 Tag: @opcopytests
4847 Value: <opstat | function> [..]
4848 Example: @opcopytests add_Eb_Gb
4849
4850 Trick to avoid duplicating tests for different encodings of the same
4851 operation.
4852 """
4853 oInstr = self.ensureInstructionForOpTag(iTagLine);
4854
4855 # Flatten, validate and append the copy job to the instruction. We execute
4856 # them after parsing all the input so we can handle forward references.
4857 asToCopy = self.flattenAllSections(aasSections).split();
4858 if not asToCopy:
4859 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4860 for sToCopy in asToCopy:
4861 if sToCopy not in oInstr.asCopyTests:
4862 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4863 oInstr.asCopyTests.append(sToCopy);
4864 else:
4865 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4866 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4867 else:
4868 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4869
4870 _ = iEndLine;
4871 return True;
4872
4873 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4874 """
4875 Tag: @oponlytest | @oponly
4876 Value: none
4877
4878 Only test instructions with this tag. This is a trick that is handy
4879 for singling out one or two new instructions or tests.
4880
4881 See also @optestignore.
4882 """
4883 oInstr = self.ensureInstructionForOpTag(iTagLine);
4884
4885 # Validate and add instruction to only test dictionary.
4886 sValue = self.flattenAllSections(aasSections).strip();
4887 if sValue:
4888 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4889
4890 if oInstr not in g_aoOnlyTestInstructions:
4891 g_aoOnlyTestInstructions.append(oInstr);
4892
4893 _ = iEndLine;
4894 return True;
4895
4896 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4897 """
4898 Tag: @opxcpttype
4899 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4900
4901 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4902 """
4903 oInstr = self.ensureInstructionForOpTag(iTagLine);
4904
4905 # Flatten as a space separated list, split it up and validate the values.
4906 asTypes = self.flattenAllSections(aasSections).split();
4907 if len(asTypes) != 1:
4908 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4909 sType = asTypes[0];
4910 if sType not in g_kdXcptTypes:
4911 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4912 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4913 # Set it.
4914 if oInstr.sXcptType is not None:
4915 return self.errorComment(iTagLine,
4916 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4917 % ( sTag, oInstr.sXcptType, sType,));
4918 oInstr.sXcptType = sType;
4919
4920 _ = iEndLine;
4921 return True;
4922
4923 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4924 """
4925 Tag: @opfunction
4926 Value: <VMM function name>
4927
4928 This is for explicitly setting the IEM function name. Normally we pick
4929 this up from the FNIEMOP_XXX macro invocation after the description, or
4930 generate it from the mnemonic and operands.
4931
4932 It it thought it maybe necessary to set it when specifying instructions
4933 which implementation isn't following immediately or aren't implemented yet.
4934 """
4935 oInstr = self.ensureInstructionForOpTag(iTagLine);
4936
4937 # Flatten and validate the value.
4938 sFunction = self.flattenAllSections(aasSections);
4939 if not self.oReFunctionName.match(sFunction):
4940 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4941 % (sTag, sFunction, self.oReFunctionName.pattern));
4942
4943 if oInstr.sFunction is not None:
4944 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4945 % (sTag, oInstr.sFunction, sFunction,));
4946 oInstr.sFunction = sFunction;
4947
4948 _ = iEndLine;
4949 return True;
4950
4951 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4952 """
4953 Tag: @opstats
4954 Value: <VMM statistics base name>
4955
4956 This is for explicitly setting the statistics name. Normally we pick
4957 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4958 the mnemonic and operands.
4959
4960 It it thought it maybe necessary to set it when specifying instructions
4961 which implementation isn't following immediately or aren't implemented yet.
4962 """
4963 oInstr = self.ensureInstructionForOpTag(iTagLine);
4964
4965 # Flatten and validate the value.
4966 sStats = self.flattenAllSections(aasSections);
4967 if not self.oReStatsName.match(sStats):
4968 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4969 % (sTag, sStats, self.oReStatsName.pattern));
4970
4971 if oInstr.sStats is not None:
4972 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4973 % (sTag, oInstr.sStats, sStats,));
4974 oInstr.sStats = sStats;
4975
4976 _ = iEndLine;
4977 return True;
4978
4979 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4980 """
4981 Tag: @opdone
4982 Value: none
4983
4984 Used to explictily flush the instructions that have been specified.
4985 """
4986 sFlattened = self.flattenAllSections(aasSections);
4987 if sFlattened != '':
4988 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4989 _ = sTag; _ = iEndLine;
4990 return self.doneInstructions();
4991
4992 ## @}
4993
4994
4995 def parseComment(self):
4996 """
4997 Parse the current comment (self.sComment).
4998
4999 If it's a opcode specifiying comment, we reset the macro stuff.
5000 """
5001 #
5002 # Reject if comment doesn't seem to contain anything interesting.
5003 #
5004 if self.sComment.find('Opcode') < 0 \
5005 and self.sComment.find('@') < 0:
5006 return False;
5007
5008 #
5009 # Split the comment into lines, removing leading asterisks and spaces.
5010 # Also remove leading and trailing empty lines.
5011 #
5012 asLines = self.sComment.split('\n');
5013 for iLine, sLine in enumerate(asLines):
5014 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5015
5016 while asLines and not asLines[0]:
5017 self.iCommentLine += 1;
5018 asLines.pop(0);
5019
5020 while asLines and not asLines[-1]:
5021 asLines.pop(len(asLines) - 1);
5022
5023 #
5024 # Check for old style: Opcode 0x0f 0x12
5025 #
5026 if asLines[0].startswith('Opcode '):
5027 self.parseCommentOldOpcode(asLines);
5028
5029 #
5030 # Look for @op* tagged data.
5031 #
5032 cOpTags = 0;
5033 sFlatDefault = None;
5034 sCurTag = '@default';
5035 iCurTagLine = 0;
5036 asCurSection = [];
5037 aasSections = [ asCurSection, ];
5038 for iLine, sLine in enumerate(asLines):
5039 if not sLine.startswith('@'):
5040 if sLine:
5041 asCurSection.append(sLine);
5042 elif asCurSection:
5043 asCurSection = [];
5044 aasSections.append(asCurSection);
5045 else:
5046 #
5047 # Process the previous tag.
5048 #
5049 if not asCurSection and len(aasSections) > 1:
5050 aasSections.pop(-1);
5051 if sCurTag in self.dTagHandlers:
5052 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5053 cOpTags += 1;
5054 elif sCurTag.startswith('@op'):
5055 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5056 elif sCurTag == '@default':
5057 sFlatDefault = self.flattenAllSections(aasSections);
5058 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5059 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5060 elif sCurTag in ['@encoding', '@opencoding']:
5061 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5062
5063 #
5064 # New tag.
5065 #
5066 asSplit = sLine.split(None, 1);
5067 sCurTag = asSplit[0].lower();
5068 if len(asSplit) > 1:
5069 asCurSection = [asSplit[1],];
5070 else:
5071 asCurSection = [];
5072 aasSections = [asCurSection, ];
5073 iCurTagLine = iLine;
5074
5075 #
5076 # Process the final tag.
5077 #
5078 if not asCurSection and len(aasSections) > 1:
5079 aasSections.pop(-1);
5080 if sCurTag in self.dTagHandlers:
5081 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5082 cOpTags += 1;
5083 elif sCurTag.startswith('@op'):
5084 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5085 elif sCurTag == '@default':
5086 sFlatDefault = self.flattenAllSections(aasSections);
5087
5088 #
5089 # Don't allow default text in blocks containing @op*.
5090 #
5091 if cOpTags > 0 and sFlatDefault:
5092 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5093
5094 return True;
5095
5096 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5097 """
5098 Parses a macro invocation.
5099
5100 Returns three values:
5101 1. A list of macro arguments, where the zero'th is the macro name.
5102 2. The offset following the macro invocation, into sInvocation of
5103 this is on the same line or into the last line if it is on a
5104 different line.
5105 3. Number of additional lines the invocation spans (i.e. zero if
5106 it is all contained within sInvocation).
5107 """
5108 # First the name.
5109 offOpen = sInvocation.find('(', offStartInvocation);
5110 if offOpen <= offStartInvocation:
5111 self.raiseError("macro invocation open parenthesis not found");
5112 sName = sInvocation[offStartInvocation:offOpen].strip();
5113 if not self.oReMacroName.match(sName):
5114 self.raiseError("invalid macro name '%s'" % (sName,));
5115 asRet = [sName, ];
5116
5117 # Arguments.
5118 iLine = self.iLine;
5119 cDepth = 1;
5120 off = offOpen + 1;
5121 offStart = off;
5122 offCurLn = 0;
5123 chQuote = None;
5124 while cDepth > 0:
5125 if off >= len(sInvocation):
5126 if iLine >= len(self.asLines):
5127 self.error('macro invocation beyond end of file');
5128 return (asRet, off - offCurLn, iLine - self.iLine);
5129 offCurLn = off;
5130 sInvocation += self.asLines[iLine];
5131 iLine += 1;
5132 ch = sInvocation[off];
5133
5134 if chQuote:
5135 if ch == '\\' and off + 1 < len(sInvocation):
5136 off += 1;
5137 elif ch == chQuote:
5138 chQuote = None;
5139 elif ch in ('"', '\'',):
5140 chQuote = ch;
5141 elif ch in (',', ')',):
5142 if cDepth == 1:
5143 asRet.append(sInvocation[offStart:off].strip());
5144 offStart = off + 1;
5145 if ch == ')':
5146 cDepth -= 1;
5147 elif ch == '(':
5148 cDepth += 1;
5149 off += 1;
5150
5151 return (asRet, off - offCurLn, iLine - self.iLine);
5152
5153 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5154 """
5155 Returns (None, len(sCode), 0) if not found, otherwise the
5156 parseMacroInvocation() return value.
5157 """
5158 offHit = sCode.find(sMacro, offStart);
5159 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5160 return self.parseMacroInvocation(sCode, offHit);
5161 return (None, len(sCode), 0);
5162
5163 def findAndParseMacroInvocation(self, sCode, sMacro):
5164 """
5165 Returns None if not found, arguments as per parseMacroInvocation if found.
5166 """
5167 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5168
5169 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5170 """
5171 Returns same as findAndParseMacroInvocation.
5172 """
5173 for sMacro in asMacro:
5174 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5175 if asRet is not None:
5176 return asRet;
5177 return None;
5178
5179 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5180 sDisHints, sIemHints, asOperands):
5181 """
5182 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5183 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5184 """
5185 #
5186 # Some invocation checks.
5187 #
5188 if sUpper != sUpper.upper():
5189 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5190 if sLower != sLower.lower():
5191 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5192 if sUpper.lower() != sLower:
5193 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5194 if not self.oReMnemonic.match(sLower):
5195 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5196
5197 #
5198 # Check if sIemHints tells us to not consider this macro invocation.
5199 #
5200 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5201 return True;
5202
5203 # Apply to the last instruction only for now.
5204 if not self.aoCurInstrs:
5205 self.addInstruction();
5206 oInstr = self.aoCurInstrs[-1];
5207 if oInstr.iLineMnemonicMacro == -1:
5208 oInstr.iLineMnemonicMacro = self.iLine;
5209 else:
5210 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5211 % (sMacro, oInstr.iLineMnemonicMacro,));
5212
5213 # Mnemonic
5214 if oInstr.sMnemonic is None:
5215 oInstr.sMnemonic = sLower;
5216 elif oInstr.sMnemonic != sLower:
5217 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5218
5219 # Process operands.
5220 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5221 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5222 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5223 for iOperand, sType in enumerate(asOperands):
5224 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5225 if sWhere is None:
5226 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5227 if iOperand < len(oInstr.aoOperands): # error recovery.
5228 sWhere = oInstr.aoOperands[iOperand].sWhere;
5229 sType = oInstr.aoOperands[iOperand].sType;
5230 else:
5231 sWhere = 'reg';
5232 sType = 'Gb';
5233 if iOperand == len(oInstr.aoOperands):
5234 oInstr.aoOperands.append(Operand(sWhere, sType))
5235 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5236 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5237 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5238 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5239
5240 # Encoding.
5241 if sForm not in g_kdIemForms:
5242 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5243 else:
5244 if oInstr.sEncoding is None:
5245 oInstr.sEncoding = g_kdIemForms[sForm][0];
5246 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5247 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5248 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5249
5250 # Check the parameter locations for the encoding.
5251 if g_kdIemForms[sForm][1] is not None:
5252 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5253 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5254 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5255 else:
5256 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5257 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5258 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5259 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5260 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5261 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5262 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5263 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5264 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5265 or sForm.replace('VEX','').find('V') < 0) ):
5266 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5267 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5268 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5269 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5270 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5271 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5272 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5273 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5274 oInstr.aoOperands[iOperand].sWhere));
5275
5276
5277 # Check @opcodesub
5278 if oInstr.sSubOpcode \
5279 and g_kdIemForms[sForm][2] \
5280 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5281 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5282 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5283
5284 # Stats.
5285 if not self.oReStatsName.match(sStats):
5286 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5287 elif oInstr.sStats is None:
5288 oInstr.sStats = sStats;
5289 elif oInstr.sStats != sStats:
5290 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5291 % (sMacro, oInstr.sStats, sStats,));
5292
5293 # Process the hints (simply merge with @ophints w/o checking anything).
5294 for sHint in sDisHints.split('|'):
5295 sHint = sHint.strip();
5296 if sHint.startswith('DISOPTYPE_'):
5297 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5298 if sShortHint in g_kdHints:
5299 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5300 else:
5301 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5302 elif sHint != '0':
5303 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5304
5305 for sHint in sIemHints.split('|'):
5306 sHint = sHint.strip();
5307 if sHint.startswith('IEMOPHINT_'):
5308 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5309 if sShortHint in g_kdHints:
5310 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5311 else:
5312 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5313 elif sHint != '0':
5314 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5315
5316 _ = sAsm;
5317 return True;
5318
5319 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5320 """
5321 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5322 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5323 """
5324 if not asOperands:
5325 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5326 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5327 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5328
5329 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5330 """
5331 Process a IEM_MC_BEGIN macro invocation.
5332 """
5333 if self.fDebugMc:
5334 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5335 #self.debug('%s<eos>' % (sCode,));
5336
5337 # Check preconditions.
5338 if not self.oCurFunction:
5339 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5340 if self.oCurMcBlock:
5341 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5342
5343 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5344 cchIndent = offBeginStatementInCodeStr;
5345 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5346 if offPrevNewline >= 0:
5347 cchIndent -= offPrevNewline + 1;
5348 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5349
5350 # Start a new block.
5351 # But don't add it to the list unless the context matches the host architecture.
5352 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5353 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5354 cchIndent = cchIndent);
5355 try:
5356 if ( not self.aoCppCondStack
5357 or not self.sHostArch
5358 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5359 g_aoMcBlocks.append(self.oCurMcBlock);
5360 self.cTotalMcBlocks += 1;
5361 except Exception as oXcpt:
5362 self.raiseError(oXcpt.args[0]);
5363
5364 self.iMcBlockInFunc += 1;
5365 return True;
5366
5367 @staticmethod
5368 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5369 """
5370 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5371 extracting a statement block from a string that's the result of macro
5372 expansion and therefore contains multiple "sub-lines" as it were.
5373
5374 Returns list of lines covering offBegin thru offEnd in sRawLine.
5375 """
5376
5377 off = sRawLine.find('\n', offEnd);
5378 if off > 0:
5379 sRawLine = sRawLine[:off + 1];
5380
5381 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5382 sRawLine = sRawLine[off:];
5383 if not sRawLine.strip().startswith(sBeginStmt):
5384 sRawLine = sRawLine[offBegin - off:]
5385
5386 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5387
5388 def workerIemMcEnd(self, offEndStatementInLine):
5389 """
5390 Process a IEM_MC_END macro invocation.
5391 """
5392 if self.fDebugMc:
5393 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5394
5395 # Check preconditions.
5396 if not self.oCurMcBlock:
5397 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5398
5399 #
5400 # HACK ALERT! For blocks originating from macro expansion the start and
5401 # end line will be the same, but the line has multiple
5402 # newlines inside it. So, we have to do some extra tricks
5403 # to get the lines out of there. We ASSUME macros aren't
5404 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5405 #
5406 if self.iLine > self.oCurMcBlock.iBeginLine:
5407 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5408 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5409 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5410
5411 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5412 # so we can deal correctly with IEM_MC_END below and everything else.
5413 for sLine in asLines:
5414 cNewLines = sLine.count('\n');
5415 assert cNewLines > 0;
5416 if cNewLines > 1:
5417 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5418 self.oCurMcBlock.offBeginLine,
5419 offEndStatementInLine
5420 + sum(len(s) for s in asLines)
5421 - len(asLines[-1]));
5422 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5423 break;
5424 else:
5425 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5426 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5427 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5428
5429 #
5430 # Strip anything following the IEM_MC_END(); statement in the final line,
5431 # so that we don't carry on any trailing 'break' after macro expansions
5432 # like for iemOp_movsb_Xb_Yb.
5433 #
5434 while asLines[-1].strip() == '':
5435 asLines.pop();
5436 sFinal = asLines[-1];
5437 offFinalEnd = sFinal.find('IEM_MC_END');
5438 offEndInFinal = offFinalEnd;
5439 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5440 offFinalEnd += len('IEM_MC_END');
5441
5442 while sFinal[offFinalEnd].isspace():
5443 offFinalEnd += 1;
5444 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5445 offFinalEnd += 1;
5446
5447 while sFinal[offFinalEnd].isspace():
5448 offFinalEnd += 1;
5449 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5450 offFinalEnd += 1;
5451
5452 while sFinal[offFinalEnd].isspace():
5453 offFinalEnd += 1;
5454 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5455 offFinalEnd += 1;
5456
5457 asLines[-1] = sFinal[: offFinalEnd];
5458
5459 #
5460 # Complete and discard the current block.
5461 #
5462 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5463 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5464 self.oCurMcBlock = None;
5465 return True;
5466
5467 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5468 """
5469 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5470 """
5471 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5472 if self.fDebugMc:
5473 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5474 #self.debug('%s<eos>' % (sCode,));
5475
5476 # Check preconditions.
5477 if not self.oCurFunction:
5478 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5479 if self.oCurMcBlock:
5480 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5481
5482 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5483 cchIndent = offBeginStatementInCodeStr;
5484 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5485 if offPrevNewline >= 0:
5486 cchIndent -= offPrevNewline + 1;
5487 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5488
5489 # Start a new block.
5490 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5491 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5492 cchIndent = cchIndent, fDeferToCImpl = True);
5493
5494 # Parse the statment.
5495 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5496 if asArgs is None:
5497 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5498 if len(asArgs) != cParams + 4:
5499 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5500 % (sStmt, len(asArgs), cParams + 4, asArgs));
5501
5502 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5503
5504 # These MCs are not typically part of macro expansions, but let's get
5505 # it out of the way immediately if it's the case.
5506 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5507 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5508 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5509 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5510 asLines[-1] = asLines[-1][:offAfter + 1];
5511 else:
5512 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5513 offAfter, sStmt);
5514 assert asLines[-1].find(';') >= 0;
5515 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5516
5517 assert asLines[0].find(sStmt) >= 0;
5518 #if not asLines[0].strip().startswith(sStmt):
5519 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5520
5521 # Advance to the line with the closing ')'.
5522 self.iLine += cLines;
5523
5524 # Complete the block.
5525 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5526
5527 g_aoMcBlocks.append(oMcBlock);
5528 self.cTotalMcBlocks += 1;
5529 self.iMcBlockInFunc += 1;
5530
5531 return True;
5532
5533 def workerStartFunction(self, asArgs):
5534 """
5535 Deals with the start of a decoder function.
5536
5537 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5538 macros, so we get a argument list for these where the 0th argument is the
5539 macro name.
5540 """
5541 # Complete any existing function.
5542 if self.oCurFunction:
5543 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5544
5545 # Create the new function.
5546 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5547 return True;
5548
5549 def checkCodeForMacro(self, sCode, offLine):
5550 """
5551 Checks code for relevant macro invocation.
5552 """
5553
5554 #
5555 # Scan macro invocations.
5556 #
5557 if sCode.find('(') > 0:
5558 # Look for instruction decoder function definitions. ASSUME single line.
5559 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5560 [ 'FNIEMOP_DEF',
5561 'FNIEMOPRM_DEF',
5562 'FNIEMOP_STUB',
5563 'FNIEMOP_STUB_1',
5564 'FNIEMOP_UD_STUB',
5565 'FNIEMOP_UD_STUB_1' ]);
5566 if asArgs is not None:
5567 self.workerStartFunction(asArgs);
5568 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5569
5570 if not self.aoCurInstrs:
5571 self.addInstruction();
5572 for oInstr in self.aoCurInstrs:
5573 if oInstr.iLineFnIemOpMacro == -1:
5574 oInstr.iLineFnIemOpMacro = self.iLine;
5575 else:
5576 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5577 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5578 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5579 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5580 if asArgs[0].find('STUB') > 0:
5581 self.doneInstructions(fEndOfFunction = True);
5582 return True;
5583
5584 # Check for worker function definitions, so we can get a context for MC blocks.
5585 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5586 [ 'FNIEMOP_DEF_1',
5587 'FNIEMOP_DEF_2', ]);
5588 if asArgs is not None:
5589 self.workerStartFunction(asArgs);
5590 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5591 return True;
5592
5593 # IEMOP_HLP_DONE_VEX_DECODING_*
5594 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5595 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5596 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5597 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5598 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5599 ]);
5600 if asArgs is not None:
5601 sMacro = asArgs[0];
5602 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5603 for oInstr in self.aoCurInstrs:
5604 if 'vex_l_zero' not in oInstr.dHints:
5605 if oInstr.iLineMnemonicMacro >= 0:
5606 self.errorOnLine(oInstr.iLineMnemonicMacro,
5607 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5608 oInstr.dHints['vex_l_zero'] = True;
5609
5610 #
5611 # IEMOP_MNEMONIC*
5612 #
5613 if sCode.find('IEMOP_MNEMONIC') >= 0:
5614 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5615 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5616 if asArgs is not None:
5617 if len(self.aoCurInstrs) == 1:
5618 oInstr = self.aoCurInstrs[0];
5619 if oInstr.sStats is None:
5620 oInstr.sStats = asArgs[1];
5621 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5622
5623 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5624 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5625 if asArgs is not None:
5626 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5627 asArgs[7], []);
5628 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5629 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5630 if asArgs is not None:
5631 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5632 asArgs[8], [asArgs[6],]);
5633 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5634 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5635 if asArgs is not None:
5636 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5637 asArgs[9], [asArgs[6], asArgs[7]]);
5638 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5639 # a_fIemHints)
5640 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5641 if asArgs is not None:
5642 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5643 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5644 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5645 # a_fIemHints)
5646 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5647 if asArgs is not None:
5648 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5649 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5650
5651 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5652 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5653 if asArgs is not None:
5654 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5655 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5656 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5657 if asArgs is not None:
5658 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5659 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5660 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5661 if asArgs is not None:
5662 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5663 [asArgs[4], asArgs[5],]);
5664 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5665 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5666 if asArgs is not None:
5667 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5668 [asArgs[4], asArgs[5], asArgs[6],]);
5669 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5670 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5671 if asArgs is not None:
5672 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5673 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5674
5675 #
5676 # IEM_MC_BEGIN + IEM_MC_END.
5677 # We must support multiple instances per code snippet.
5678 #
5679 offCode = sCode.find('IEM_MC_');
5680 if offCode >= 0:
5681 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5682 if oMatch.group(1) == 'END':
5683 self.workerIemMcEnd(offLine + oMatch.start());
5684 elif oMatch.group(1) == 'BEGIN':
5685 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5686 else:
5687 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5688 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5689 return True;
5690
5691 return False;
5692
5693 def workerPreprocessorRecreateMacroRegex(self):
5694 """
5695 Recreates self.oReMacros when self.dMacros changes.
5696 """
5697 if self.dMacros:
5698 sRegex = '';
5699 for sName, oMacro in self.dMacros.items():
5700 if sRegex:
5701 sRegex += r'|' + sName;
5702 else:
5703 sRegex = r'\b(' + sName;
5704 if oMacro.asArgs is not None:
5705 sRegex += r'\s*\(';
5706 else:
5707 sRegex += r'\b';
5708 sRegex += ')';
5709 self.oReMacros = re.compile(sRegex);
5710 else:
5711 self.oReMacros = None;
5712 return True;
5713
5714 def workerPreprocessorDefine(self, sRest):
5715 """
5716 Handles a macro #define, the sRest is what follows after the directive word.
5717 """
5718 assert sRest[-1] == '\n';
5719
5720 #
5721 # If using line continutation, just concat all the lines together,
5722 # preserving the newline character but not the escaping.
5723 #
5724 iLineStart = self.iLine;
5725 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5726 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5727 self.iLine += 1;
5728 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5729
5730 #
5731 # Use regex to split out the name, argument list and body.
5732 # If this fails, we assume it's a simple macro.
5733 #
5734 oMatch = self.oReHashDefine2.match(sRest);
5735 if oMatch:
5736 sAllArgs = oMatch.group(2).strip();
5737 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5738 sBody = oMatch.group(3);
5739 else:
5740 oMatch = self.oReHashDefine3.match(sRest);
5741 if not oMatch:
5742 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5743 return self.error('bogus macro definition: %s' % (sRest,));
5744 asArgs = None;
5745 sBody = oMatch.group(2);
5746 sName = oMatch.group(1);
5747 assert sName == sName.strip();
5748 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5749
5750 #
5751 # Is this of any interest to us? We do NOT support MC blocks wihtin
5752 # nested macro expansion, just to avoid lots of extra work.
5753 #
5754 # There is only limited support for macros expanding to partial MC blocks.
5755 #
5756 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5757 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5758 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5759 # siblings in the recompiler. This is a lot simpler than nested macro
5760 # expansion and lots of heuristics for locating all the relevant macros.
5761 # Also, this way we don't produce lots of unnecessary threaded functions.
5762 #
5763 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5764 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5765 return True;
5766
5767 #
5768 # Add the macro.
5769 #
5770 if self.fDebugPreproc:
5771 self.debug('#define %s on line %u' % (sName, self.iLine,));
5772 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5773 return self.workerPreprocessorRecreateMacroRegex();
5774
5775 def workerPreprocessorUndef(self, sRest):
5776 """
5777 Handles a macro #undef, the sRest is what follows after the directive word.
5778 """
5779 # Quick comment strip and isolate the name.
5780 offSlash = sRest.find('/');
5781 if offSlash > 0:
5782 sRest = sRest[:offSlash];
5783 sName = sRest.strip();
5784
5785 # Remove the macro if we're clocking it.
5786 if sName in self.dMacros:
5787 if self.fDebugPreproc:
5788 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5789 del self.dMacros[sName];
5790 return self.workerPreprocessorRecreateMacroRegex();
5791
5792 return True;
5793
5794 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5795 """
5796 Handles an #if, #ifdef, #ifndef or #elif directive.
5797 """
5798 #
5799 # Sanity check #elif.
5800 #
5801 if sDirective == 'elif':
5802 if len(self.aoCppCondStack) == 0:
5803 self.raiseError('#elif without #if');
5804 if self.aoCppCondStack[-1].fInElse:
5805 self.raiseError('#elif after #else');
5806
5807 #
5808 # If using line continutation, just concat all the lines together,
5809 # stripping both the newline and escape characters.
5810 #
5811 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5812 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5813 self.iLine += 1;
5814
5815 # Strip it of all comments and leading and trailing blanks.
5816 sRest = self.stripComments(sRest).strip();
5817
5818 #
5819 # Stash it.
5820 #
5821 try:
5822 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5823 except Exception as oXcpt:
5824 self.raiseError(oXcpt.args[0]);
5825
5826 if sDirective == 'elif':
5827 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5828 else:
5829 self.aoCppCondStack.append(oPreprocCond);
5830
5831 return True;
5832
5833 def workerPreprocessorElse(self):
5834 """
5835 Handles an #else directive.
5836 """
5837 if len(self.aoCppCondStack) == 0:
5838 self.raiseError('#else without #if');
5839 if self.aoCppCondStack[-1].fInElse:
5840 self.raiseError('Another #else after #else');
5841
5842 self.aoCppCondStack[-1].fInElse = True;
5843 return True;
5844
5845 def workerPreprocessorEndif(self):
5846 """
5847 Handles an #endif directive.
5848 """
5849 if len(self.aoCppCondStack) == 0:
5850 self.raiseError('#endif without #if');
5851
5852 self.aoCppCondStack.pop();
5853 return True;
5854
5855 def checkPreprocessorDirective(self, sLine):
5856 """
5857 Handles a preprocessor directive.
5858 """
5859 # Skip past the preprocessor hash.
5860 off = sLine.find('#');
5861 assert off >= 0;
5862 off += 1;
5863 while off < len(sLine) and sLine[off].isspace():
5864 off += 1;
5865
5866 # Extract the directive.
5867 offDirective = off;
5868 while off < len(sLine) and not sLine[off].isspace():
5869 off += 1;
5870 sDirective = sLine[offDirective:off];
5871 if self.fDebugPreproc:
5872 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5873
5874 # Skip spaces following it to where the arguments/whatever starts.
5875 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5876 off += 1;
5877 sTail = sLine[off:];
5878
5879 # Handle the directive.
5880 if sDirective == 'define':
5881 return self.workerPreprocessorDefine(sTail);
5882 if sDirective == 'undef':
5883 return self.workerPreprocessorUndef(sTail);
5884 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5885 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5886 if sDirective == 'else':
5887 return self.workerPreprocessorElse();
5888 if sDirective == 'endif':
5889 return self.workerPreprocessorEndif();
5890
5891 if self.fDebugPreproc:
5892 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5893 return False;
5894
5895 def expandMacros(self, sLine, oMatch):
5896 """
5897 Expands macros we know about in the given line.
5898 Currently we ASSUME there is only one and that is what oMatch matched.
5899 """
5900 #
5901 # Get our bearings.
5902 #
5903 offMatch = oMatch.start();
5904 sName = oMatch.group(1);
5905 assert sName == sLine[oMatch.start() : oMatch.end()];
5906 fWithArgs = sName.endswith('(');
5907 if fWithArgs:
5908 sName = sName[:-1].strip();
5909 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5910
5911 #
5912 # Deal with simple macro invocations w/o parameters.
5913 #
5914 if not fWithArgs:
5915 if self.fDebugPreproc:
5916 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5917 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5918
5919 #
5920 # Complicated macro with parameters.
5921 # Start by extracting the parameters. ASSUMES they are all on the same line!
5922 #
5923 cLevel = 1;
5924 offCur = oMatch.end();
5925 offCurArg = offCur;
5926 asArgs = [];
5927 while True:
5928 if offCur >= len(sLine):
5929 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5930 ch = sLine[offCur];
5931 if ch == '(':
5932 cLevel += 1;
5933 elif ch == ')':
5934 cLevel -= 1;
5935 if cLevel == 0:
5936 asArgs.append(sLine[offCurArg:offCur].strip());
5937 break;
5938 elif ch == ',' and cLevel == 1:
5939 asArgs.append(sLine[offCurArg:offCur].strip());
5940 offCurArg = offCur + 1;
5941 offCur += 1;
5942 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5943 asArgs = [];
5944 if len(oMacro.asArgs) != len(asArgs):
5945 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5946
5947 #
5948 # Do the expanding.
5949 #
5950 if self.fDebugPreproc:
5951 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5952 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5953
5954 def parse(self):
5955 """
5956 Parses the given file.
5957
5958 Returns number or errors.
5959 Raises exception on fatal trouble.
5960 """
5961 #self.debug('Parsing %s' % (self.sSrcFile,));
5962
5963 #
5964 # Loop thru the lines.
5965 #
5966 # Please mind that self.iLine may be updated by checkCodeForMacro and
5967 # other worker methods.
5968 #
5969 while self.iLine < len(self.asLines):
5970 sLine = self.asLines[self.iLine];
5971 self.iLine += 1;
5972 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5973
5974 # Expand macros we know about if we're currently in code.
5975 if self.iState == self.kiCode and self.oReMacros:
5976 oMatch = self.oReMacros.search(sLine);
5977 if oMatch:
5978 sLine = self.expandMacros(sLine, oMatch);
5979 if self.fDebugPreproc:
5980 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5981 self.asLines[self.iLine - 1] = sLine;
5982
5983 # Check for preprocessor directives before comments and other stuff.
5984 # ASSUMES preprocessor directives doesn't end with multiline comments.
5985 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5986 if self.fDebugPreproc:
5987 self.debug('line %d: preproc' % (self.iLine,));
5988 self.checkPreprocessorDirective(sLine);
5989 else:
5990 # Look for comments.
5991 offSlash = sLine.find('/');
5992 if offSlash >= 0:
5993 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5994 offLine = 0;
5995 while offLine < len(sLine):
5996 if self.iState == self.kiCode:
5997 # Look for substantial multiline comment so we pass the following MC as a whole line:
5998 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5999 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6000 offHit = sLine.find('/*', offLine);
6001 while offHit >= 0:
6002 offEnd = sLine.find('*/', offHit + 2);
6003 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6004 break;
6005 offHit = sLine.find('/*', offEnd);
6006
6007 if offHit >= 0:
6008 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6009 self.sComment = '';
6010 self.iCommentLine = self.iLine;
6011 self.iState = self.kiCommentMulti;
6012 offLine = offHit + 2;
6013 else:
6014 self.checkCodeForMacro(sLine[offLine:], offLine);
6015 offLine = len(sLine);
6016
6017 elif self.iState == self.kiCommentMulti:
6018 offHit = sLine.find('*/', offLine);
6019 if offHit >= 0:
6020 self.sComment += sLine[offLine:offHit];
6021 self.iState = self.kiCode;
6022 offLine = offHit + 2;
6023 self.parseComment();
6024 else:
6025 self.sComment += sLine[offLine:];
6026 offLine = len(sLine);
6027 else:
6028 assert False;
6029 # C++ line comment.
6030 elif offSlash > 0:
6031 self.checkCodeForMacro(sLine[:offSlash], 0);
6032
6033 # No slash, but append the line if in multi-line comment.
6034 elif self.iState == self.kiCommentMulti:
6035 #self.debug('line %d: multi' % (self.iLine,));
6036 self.sComment += sLine;
6037
6038 # No slash, but check code line for relevant macro.
6039 elif ( self.iState == self.kiCode
6040 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6041 #self.debug('line %d: macro' % (self.iLine,));
6042 self.checkCodeForMacro(sLine, 0);
6043
6044 # If the line is a '}' in the first position, complete the instructions.
6045 elif self.iState == self.kiCode and sLine[0] == '}':
6046 #self.debug('line %d: }' % (self.iLine,));
6047 self.doneInstructions(fEndOfFunction = True);
6048
6049 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6050 # so we can check/add @oppfx info from it.
6051 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6052 self.parseFunctionTable(sLine);
6053
6054 self.doneInstructions(fEndOfFunction = True);
6055 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6056 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6057 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6058 return self.printErrors();
6059
6060# Some sanity checking.
6061for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6062 for sAttrib, asFlags in dLists.items():
6063 for sFlag in asFlags:
6064 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6065
6066## The parsed content of IEMAllInstCommonBodyMacros.h.
6067g_oParsedCommonBodyMacros = None # type: SimpleParser
6068
6069def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6070 """
6071 Parses one source file for instruction specfications.
6072 """
6073 #
6074 # Read sSrcFile into a line array.
6075 #
6076 try:
6077 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6078 except Exception as oXcpt:
6079 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6080 try:
6081 asLines = oFile.readlines();
6082 except Exception as oXcpt:
6083 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6084 finally:
6085 oFile.close();
6086
6087 #
6088 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6089 # can use the macros from it when processing the other files.
6090 #
6091 global g_oParsedCommonBodyMacros;
6092 if g_oParsedCommonBodyMacros is None:
6093 # Locate the file.
6094 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6095 if not os.path.isfile(sCommonBodyMacros):
6096 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6097
6098 # Read it.
6099 try:
6100 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6101 asIncFiles = oIncFile.readlines();
6102 except Exception as oXcpt:
6103 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6104
6105 # Parse it.
6106 try:
6107 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6108 if oParser.parse() != 0:
6109 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6110 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6111 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6112 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6113 oParser.cTotalMcBlocks,
6114 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6115 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6116 except ParserException as oXcpt:
6117 print(str(oXcpt), file = sys.stderr);
6118 raise;
6119 g_oParsedCommonBodyMacros = oParser;
6120
6121 #
6122 # Do the parsing.
6123 #
6124 try:
6125 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6126 return (oParser.parse(), oParser) ;
6127 except ParserException as oXcpt:
6128 print(str(oXcpt), file = sys.stderr);
6129 raise;
6130
6131
6132def __doTestCopying():
6133 """
6134 Executes the asCopyTests instructions.
6135 """
6136 asErrors = [];
6137 for oDstInstr in g_aoAllInstructions:
6138 if oDstInstr.asCopyTests:
6139 for sSrcInstr in oDstInstr.asCopyTests:
6140 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6141 if oSrcInstr:
6142 aoSrcInstrs = [oSrcInstr,];
6143 else:
6144 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6145 if aoSrcInstrs:
6146 for oSrcInstr in aoSrcInstrs:
6147 if oSrcInstr != oDstInstr:
6148 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6149 else:
6150 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6151 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6152 else:
6153 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6154 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6155
6156 if asErrors:
6157 sys.stderr.write(u''.join(asErrors));
6158 return len(asErrors);
6159
6160
6161def __applyOnlyTest():
6162 """
6163 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6164 all other instructions so that only these get tested.
6165 """
6166 if g_aoOnlyTestInstructions:
6167 for oInstr in g_aoAllInstructions:
6168 if oInstr.aoTests:
6169 if oInstr not in g_aoOnlyTestInstructions:
6170 oInstr.aoTests = [];
6171 return 0;
6172
6173## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6174g_aaoAllInstrFilesAndDefaultMapAndSet = (
6175 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6176 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6177 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6178 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6179 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6180 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6181 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6182 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6183 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6184);
6185
6186def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6187 """
6188 Parses all the IEMAllInstruction*.cpp.h files.
6189
6190 Returns a list of the parsers on success.
6191 Raises exception on failure.
6192 """
6193 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6194 cErrors = 0;
6195 aoParsers = [];
6196 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6197 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6198 sFilename = os.path.join(sSrcDir, sFilename);
6199 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6200 cErrors += cThisErrors;
6201 aoParsers.append(oParser);
6202 cErrors += __doTestCopying();
6203 cErrors += __applyOnlyTest();
6204
6205 # Total stub stats:
6206 cTotalStubs = 0;
6207 for oInstr in g_aoAllInstructions:
6208 cTotalStubs += oInstr.fStub;
6209 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6210 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6211 file = sys.stderr);
6212
6213 if cErrors != 0:
6214 raise Exception('%d parse errors' % (cErrors,));
6215 return aoParsers;
6216
6217
6218def parseFiles(asFiles, sHostArch = None):
6219 """
6220 Parses a selection of IEMAllInstruction*.cpp.h files.
6221
6222 Returns a list of the parsers on success.
6223 Raises exception on failure.
6224 """
6225 # Look up default maps for the files and call __parseFilesWorker to do the job.
6226 asFilesAndDefaultMap = [];
6227 for sFilename in asFiles:
6228 sName = os.path.split(sFilename)[1].lower();
6229 sMap = None;
6230 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6231 if aoInfo[0].lower() == sName:
6232 sMap = aoInfo[1];
6233 break;
6234 if not sMap:
6235 raise Exception('Unable to classify file: %s' % (sFilename,));
6236 asFilesAndDefaultMap.append((sFilename, sMap));
6237
6238 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6239
6240
6241def parseAll(sHostArch = None):
6242 """
6243 Parses all the IEMAllInstruction*.cpp.h files.
6244
6245 Returns a list of the parsers on success.
6246 Raises exception on failure.
6247 """
6248 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6249
6250
6251#
6252# Generators (may perhaps move later).
6253#
6254def __formatDisassemblerTableEntry(oInstr):
6255 """
6256 """
6257 sMacro = 'OP';
6258 cMaxOperands = 3;
6259 if len(oInstr.aoOperands) > 3:
6260 sMacro = 'OPVEX'
6261 cMaxOperands = 4;
6262 assert len(oInstr.aoOperands) <= cMaxOperands;
6263
6264 #
6265 # Format string.
6266 #
6267 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6268 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6269 sTmp += ' ' if iOperand == 0 else ',';
6270 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6271 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6272 else:
6273 sTmp += g_kdOpTypes[oOperand.sType][2];
6274 sTmp += '",';
6275 asColumns = [ sTmp, ];
6276
6277 #
6278 # Decoders.
6279 #
6280 iStart = len(asColumns);
6281 if oInstr.sEncoding is None:
6282 pass;
6283 elif oInstr.sEncoding == 'ModR/M':
6284 # ASSUME the first operand is using the ModR/M encoding
6285 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6286 asColumns.append('IDX_ParseModRM,');
6287 elif oInstr.sEncoding in [ 'prefix', ]:
6288 for oOperand in oInstr.aoOperands:
6289 asColumns.append('0,');
6290 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6291 pass;
6292 elif oInstr.sEncoding == 'VEX.ModR/M':
6293 asColumns.append('IDX_ParseModRM,');
6294 elif oInstr.sEncoding == 'vex2':
6295 asColumns.append('IDX_ParseVex2b,')
6296 elif oInstr.sEncoding == 'vex3':
6297 asColumns.append('IDX_ParseVex3b,')
6298 elif oInstr.sEncoding in g_dInstructionMaps:
6299 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6300 else:
6301 ## @todo
6302 #IDX_ParseTwoByteEsc,
6303 #IDX_ParseGrp1,
6304 #IDX_ParseShiftGrp2,
6305 #IDX_ParseGrp3,
6306 #IDX_ParseGrp4,
6307 #IDX_ParseGrp5,
6308 #IDX_Parse3DNow,
6309 #IDX_ParseGrp6,
6310 #IDX_ParseGrp7,
6311 #IDX_ParseGrp8,
6312 #IDX_ParseGrp9,
6313 #IDX_ParseGrp10,
6314 #IDX_ParseGrp12,
6315 #IDX_ParseGrp13,
6316 #IDX_ParseGrp14,
6317 #IDX_ParseGrp15,
6318 #IDX_ParseGrp16,
6319 #IDX_ParseThreeByteEsc4,
6320 #IDX_ParseThreeByteEsc5,
6321 #IDX_ParseModFence,
6322 #IDX_ParseEscFP,
6323 #IDX_ParseNopPause,
6324 #IDX_ParseInvOpModRM,
6325 assert False, str(oInstr);
6326
6327 # Check for immediates and stuff in the remaining operands.
6328 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6329 sIdx = g_kdOpTypes[oOperand.sType][0];
6330 #if sIdx != 'IDX_UseModRM':
6331 asColumns.append(sIdx + ',');
6332 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6333
6334 #
6335 # Opcode and operands.
6336 #
6337 assert oInstr.sDisEnum, str(oInstr);
6338 asColumns.append(oInstr.sDisEnum + ',');
6339 iStart = len(asColumns)
6340 for oOperand in oInstr.aoOperands:
6341 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6342 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6343
6344 #
6345 # Flags.
6346 #
6347 sTmp = '';
6348 for sHint in sorted(oInstr.dHints.keys()):
6349 sDefine = g_kdHints[sHint];
6350 if sDefine.startswith('DISOPTYPE_'):
6351 if sTmp:
6352 sTmp += ' | ' + sDefine;
6353 else:
6354 sTmp += sDefine;
6355 if sTmp:
6356 sTmp += '),';
6357 else:
6358 sTmp += '0),';
6359 asColumns.append(sTmp);
6360
6361 #
6362 # Format the columns into a line.
6363 #
6364 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6365 sLine = '';
6366 for i, s in enumerate(asColumns):
6367 if len(sLine) < aoffColumns[i]:
6368 sLine += ' ' * (aoffColumns[i] - len(sLine));
6369 else:
6370 sLine += ' ';
6371 sLine += s;
6372
6373 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6374 # DISOPTYPE_HARMLESS),
6375 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6376 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6377 return sLine;
6378
6379def __checkIfShortTable(aoTableOrdered, oMap):
6380 """
6381 Returns (iInstr, cInstructions, fShortTable)
6382 """
6383
6384 # Determin how much we can trim off.
6385 cInstructions = len(aoTableOrdered);
6386 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6387 cInstructions -= 1;
6388
6389 iInstr = 0;
6390 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6391 iInstr += 1;
6392
6393 # If we can save more than 30%, we go for the short table version.
6394 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6395 return (iInstr, cInstructions, True);
6396 _ = oMap; # Use this for overriding.
6397
6398 # Output the full table.
6399 return (0, len(aoTableOrdered), False);
6400
6401def generateDisassemblerTables(oDstFile = sys.stdout):
6402 """
6403 Generates disassembler tables.
6404
6405 Returns exit code.
6406 """
6407
6408 #
6409 # Parse all.
6410 #
6411 try:
6412 parseAll();
6413 except Exception as oXcpt:
6414 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6415 traceback.print_exc(file = sys.stderr);
6416 return 1;
6417
6418
6419 #
6420 # The disassembler uses a slightly different table layout to save space,
6421 # since several of the prefix varia
6422 #
6423 aoDisasmMaps = [];
6424 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6425 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6426 if oMap.sSelector != 'byte+pfx':
6427 aoDisasmMaps.append(oMap);
6428 else:
6429 # Split the map by prefix.
6430 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6431 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6432 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6433 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6434
6435 #
6436 # Dump each map.
6437 #
6438 asHeaderLines = [];
6439 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6440 for oMap in aoDisasmMaps:
6441 sName = oMap.sName;
6442
6443 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6444
6445 #
6446 # Get the instructions for the map and see if we can do a short version or not.
6447 #
6448 aoTableOrder = oMap.getInstructionsInTableOrder();
6449 cEntriesPerByte = oMap.getEntriesPerByte();
6450 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6451
6452 #
6453 # Output the table start.
6454 # Note! Short tables are static and only accessible via the map range record.
6455 #
6456 asLines = [];
6457 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6458 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6459 if fShortTable:
6460 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6461 else:
6462 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6463 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6464 asLines.append('{');
6465
6466 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6467 asLines.append(' /* %#04x: */' % (iInstrStart,));
6468
6469 #
6470 # Output the instructions.
6471 #
6472 iInstr = iInstrStart;
6473 while iInstr < iInstrEnd:
6474 oInstr = aoTableOrder[iInstr];
6475 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6476 if iInstr != iInstrStart:
6477 asLines.append('');
6478 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6479
6480 if oInstr is None:
6481 # Invalid. Optimize blocks of invalid instructions.
6482 cInvalidInstrs = 1;
6483 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6484 cInvalidInstrs += 1;
6485 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6486 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6487 iInstr += 0x10 * cEntriesPerByte - 1;
6488 elif cEntriesPerByte > 1:
6489 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6490 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6491 iInstr += 3;
6492 else:
6493 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6494 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6495 else:
6496 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6497 elif isinstance(oInstr, list):
6498 if len(oInstr) != 0:
6499 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6500 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6501 else:
6502 asLines.append(__formatDisassemblerTableEntry(oInstr));
6503 else:
6504 asLines.append(__formatDisassemblerTableEntry(oInstr));
6505
6506 iInstr += 1;
6507
6508 if iInstrStart >= iInstrEnd:
6509 asLines.append(' /* dummy */ INVALID_OPCODE');
6510
6511 asLines.append('};');
6512 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6513
6514 #
6515 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6516 #
6517 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6518 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6519 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6520
6521 #
6522 # Write out the lines.
6523 #
6524 oDstFile.write('\n'.join(asLines));
6525 oDstFile.write('\n');
6526 oDstFile.write('\n');
6527 #break; #for now
6528 return 0;
6529
6530if __name__ == '__main__':
6531 sys.exit(generateDisassemblerTables());
6532
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette