VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103898

Last change on this file since 103898 was 103898, checked in by vboxsync, 13 months ago

VMM/IEM: Fixed todo on vpextrw's mnemonic and reordered the MCs a bit more sensibly (C++ style not C with vars at top, because that's inefficient for the register allocator). bugref:9898

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 321.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103898 2024-03-18 15:31:00Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103898 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_MRI': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
408 'VEX_MRI_REG': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
409 'VEX_MRI_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
410 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
411 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
412 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
413 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
414 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
415 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
416 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
417 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
418 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
419 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
420 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
421 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
422 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
423 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
424 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
425 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
426
427 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
428 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
429 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
430 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
431 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
432 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
433
434 'FIXED': ( 'fixed', None, '', ),
435};
436
437## \@oppfx values.
438g_kdPrefixes = {
439 'none': [],
440 '0x66': [],
441 '0xf3': [],
442 '0xf2': [],
443 '!0xf3': [], # special case for bsf/tzcnt
444};
445
446## Special \@opcode tag values.
447g_kdSpecialOpcodes = {
448 '/reg': [],
449 'mr/reg': [],
450 '11 /reg': [],
451 '!11 /reg': [],
452 '11 mr/reg': [],
453 '!11 mr/reg': [],
454};
455
456## Special \@opcodesub tag values.
457## The first value is the real value for aliases.
458## The second value is for bs3cg1.
459g_kdSubOpcodes = {
460 'none': [ None, '', ],
461 '11 mr/reg': [ '11 mr/reg', '', ],
462 '11': [ '11 mr/reg', '', ], ##< alias
463 '!11 mr/reg': [ '!11 mr/reg', '', ],
464 '!11': [ '!11 mr/reg', '', ], ##< alias
465 'rex.w=0': [ 'rex.w=0', 'WZ', ],
466 'w=0': [ 'rex.w=0', '', ], ##< alias
467 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
468 'w=1': [ 'rex.w=1', '', ], ##< alias
469 'vex.l=0': [ 'vex.l=0', 'L0', ],
470 'vex.l=1': [ 'vex.l=0', 'L1', ],
471 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
472 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
473 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
474 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
475 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
476 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
477};
478
479## Valid values for \@openc
480g_kdEncodings = {
481 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
482 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
483 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
484 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
485 'prefix': [ None, ], ##< Prefix
486};
487
488## \@opunused, \@opinvalid, \@opinvlstyle
489g_kdInvalidStyles = {
490 'immediate': [], ##< CPU stops decoding immediately after the opcode.
491 'vex.modrm': [], ##< VEX+ModR/M, everyone.
492 'intel-modrm': [], ##< Intel decodes ModR/M.
493 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
494 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
495 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
496};
497
498g_kdCpuNames = {
499 '8086': (),
500 '80186': (),
501 '80286': (),
502 '80386': (),
503 '80486': (),
504};
505
506## \@opcpuid
507g_kdCpuIdFlags = {
508 'vme': 'X86_CPUID_FEATURE_EDX_VME',
509 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
510 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
511 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
512 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
513 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
514 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
515 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
516 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
517 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
518 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
519 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
520 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
521 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
522 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
523 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
524 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
525 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
526 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
527 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
528 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
529 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
530 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
531 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
532 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
533 'aes': 'X86_CPUID_FEATURE_ECX_AES',
534 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
535 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
536 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
537 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
538 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
539
540 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
541 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
542 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
543 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
544 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
545 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
546 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
547 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
548 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
549 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
550};
551
552## \@ophints values.
553# pylint: disable=line-too-long
554g_kdHints = {
555 'invalid': 'DISOPTYPE_INVALID', ##<
556 'harmless': 'DISOPTYPE_HARMLESS', ##<
557 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
558 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
559 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
560 'portio': 'DISOPTYPE_PORTIO', ##<
561 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
562 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
563 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
564 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
565 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
566 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
567 'illegal': 'DISOPTYPE_ILLEGAL', ##<
568 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
569 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
570 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
571 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
572 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
573 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
574 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
575 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
576 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
577 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
578 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
579 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
580 ## (only in 16 & 32 bits mode!)
581 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
582 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
583 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
584 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
585 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
586 'ignores_rexw': '', ##< Ignores REX.W.
587 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
588 'vex_l_zero': '', ##< VEX.L must be 0.
589 'vex_l_ignored': '', ##< VEX.L is ignored.
590 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
591 'lock_allowed': '', ##< Lock prefix allowed.
592};
593# pylint: enable=line-too-long
594
595## \@opxcpttype values (see SDMv2 2.4, 2.7).
596g_kdXcptTypes = {
597 'none': [],
598 '1': [],
599 '2': [],
600 '3': [],
601 '4': [],
602 '4UA': [],
603 '5': [],
604 '5LZ': [], # LZ = VEX.L must be zero.
605 '6': [],
606 '7': [],
607 '7LZ': [],
608 '8': [],
609 '11': [],
610 '12': [],
611 'E1': [],
612 'E1NF': [],
613 'E2': [],
614 'E3': [],
615 'E3NF': [],
616 'E4': [],
617 'E4NF': [],
618 'E5': [],
619 'E5NF': [],
620 'E6': [],
621 'E6NF': [],
622 'E7NF': [],
623 'E9': [],
624 'E9NF': [],
625 'E10': [],
626 'E11': [],
627 'E12': [],
628 'E12NF': [],
629};
630
631
632def _isValidOpcodeByte(sOpcode):
633 """
634 Checks if sOpcode is a valid lower case opcode byte.
635 Returns true/false.
636 """
637 if len(sOpcode) == 4:
638 if sOpcode[:2] == '0x':
639 if sOpcode[2] in '0123456789abcdef':
640 if sOpcode[3] in '0123456789abcdef':
641 return True;
642 return False;
643
644
645class InstructionMap(object):
646 """
647 Instruction map.
648
649 The opcode map provides the lead opcode bytes (empty for the one byte
650 opcode map). An instruction can be member of multiple opcode maps as long
651 as it uses the same opcode value within the map (because of VEX).
652 """
653
654 kdEncodings = {
655 'legacy': [],
656 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
657 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
658 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
659 'xop8': [], ##< XOP prefix with vvvvv = 8
660 'xop9': [], ##< XOP prefix with vvvvv = 9
661 'xop10': [], ##< XOP prefix with vvvvv = 10
662 };
663 ## Selectors.
664 ## 1. The first value is the number of table entries required by a
665 ## decoder or disassembler for this type of selector.
666 ## 2. The second value is how many entries per opcode byte if applicable.
667 kdSelectors = {
668 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
669 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
670 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
671 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
672 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
673 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
674 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
675 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
676 };
677
678 ## Define the subentry number according to the Instruction::sPrefix
679 ## value for 'byte+pfx' selected tables.
680 kiPrefixOrder = {
681 'none': 0,
682 '0x66': 1,
683 '0xf3': 2,
684 '0xf2': 3,
685 };
686
687 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
688 sEncoding = 'legacy', sDisParse = None):
689 assert sSelector in self.kdSelectors;
690 assert sEncoding in self.kdEncodings;
691 if asLeadOpcodes is None:
692 asLeadOpcodes = [];
693 else:
694 for sOpcode in asLeadOpcodes:
695 assert _isValidOpcodeByte(sOpcode);
696 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
697
698 self.sName = sName;
699 self.sIemName = sIemName;
700 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
701 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
702 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
703 self.aoInstructions = [] # type: Instruction
704 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
705
706 def copy(self, sNewName, sPrefixFilter = None):
707 """
708 Copies the table with filtering instruction by sPrefix if not None.
709 """
710 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
711 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
712 else self.sSelector,
713 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
714 if sPrefixFilter is None:
715 oCopy.aoInstructions = list(self.aoInstructions);
716 else:
717 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
718 return oCopy;
719
720 def getTableSize(self):
721 """
722 Number of table entries. This corresponds directly to the selector.
723 """
724 return self.kdSelectors[self.sSelector][0];
725
726 def getEntriesPerByte(self):
727 """
728 Number of table entries per opcode bytes.
729
730 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
731 the others it will just return 1.
732 """
733 return self.kdSelectors[self.sSelector][1];
734
735 def getInstructionIndex(self, oInstr):
736 """
737 Returns the table index for the instruction.
738 """
739 bOpcode = oInstr.getOpcodeByte();
740
741 # The byte selectors are simple. We need a full opcode byte and need just return it.
742 if self.sSelector == 'byte':
743 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
744 return bOpcode;
745
746 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
747 if self.sSelector == 'byte+pfx':
748 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
749 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
750 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
751
752 # The other selectors needs masking and shifting.
753 if self.sSelector == '/r':
754 return (bOpcode >> 3) & 0x7;
755
756 if self.sSelector == 'mod /r':
757 return (bOpcode >> 3) & 0x1f;
758
759 if self.sSelector == 'memreg /r':
760 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
761
762 if self.sSelector == '!11 /r':
763 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
764 return (bOpcode >> 3) & 0x7;
765
766 if self.sSelector == '11 /r':
767 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
768 return (bOpcode >> 3) & 0x7;
769
770 if self.sSelector == '11':
771 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
772 return bOpcode & 0x3f;
773
774 assert False, self.sSelector;
775 return -1;
776
777 def getInstructionsInTableOrder(self):
778 """
779 Get instructions in table order.
780
781 Returns array of instructions. Normally there is exactly one
782 instruction per entry. However the entry could also be None if
783 not instruction was specified for that opcode value. Or there
784 could be a list of instructions to deal with special encodings
785 where for instance prefix (e.g. REX.W) encodes a different
786 instruction or different CPUs have different instructions or
787 prefixes in the same place.
788 """
789 # Start with empty table.
790 cTable = self.getTableSize();
791 aoTable = [None] * cTable;
792
793 # Insert the instructions.
794 for oInstr in self.aoInstructions:
795 if oInstr.sOpcode:
796 idxOpcode = self.getInstructionIndex(oInstr);
797 assert idxOpcode < cTable, str(idxOpcode);
798
799 oExisting = aoTable[idxOpcode];
800 if oExisting is None:
801 aoTable[idxOpcode] = oInstr;
802 elif not isinstance(oExisting, list):
803 aoTable[idxOpcode] = list([oExisting, oInstr]);
804 else:
805 oExisting.append(oInstr);
806
807 return aoTable;
808
809
810 def getDisasTableName(self):
811 """
812 Returns the disassembler table name for this map.
813 """
814 sName = 'g_aDisas';
815 for sWord in self.sName.split('_'):
816 if sWord == 'm': # suffix indicating modrm.mod==mem
817 sName += '_m';
818 elif sWord == 'r': # suffix indicating modrm.mod==reg
819 sName += '_r';
820 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
821 sName += '_' + sWord;
822 else:
823 sWord = sWord.replace('grp', 'Grp');
824 sWord = sWord.replace('map', 'Map');
825 sName += sWord[0].upper() + sWord[1:];
826 return sName;
827
828 def getDisasRangeName(self):
829 """
830 Returns the disassembler table range name for this map.
831 """
832 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
833
834 def isVexMap(self):
835 """ Returns True if a VEX map. """
836 return self.sEncoding.startswith('vex');
837
838
839class TestType(object):
840 """
841 Test value type.
842
843 This base class deals with integer like values. The fUnsigned constructor
844 parameter indicates the default stance on zero vs sign extending. It is
845 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
846 """
847 def __init__(self, sName, acbSizes = None, fUnsigned = True):
848 self.sName = sName;
849 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
850 self.fUnsigned = fUnsigned;
851
852 class BadValue(Exception):
853 """ Bad value exception. """
854 def __init__(self, sMessage):
855 Exception.__init__(self, sMessage);
856 self.sMessage = sMessage;
857
858 ## For ascii ~ operator.
859 kdHexInv = {
860 '0': 'f',
861 '1': 'e',
862 '2': 'd',
863 '3': 'c',
864 '4': 'b',
865 '5': 'a',
866 '6': '9',
867 '7': '8',
868 '8': '7',
869 '9': '6',
870 'a': '5',
871 'b': '4',
872 'c': '3',
873 'd': '2',
874 'e': '1',
875 'f': '0',
876 };
877
878 def get(self, sValue):
879 """
880 Get the shortest normal sized byte representation of oValue.
881
882 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
883 The latter form is for AND+OR pairs where the first entry is what to
884 AND with the field and the second the one or OR with.
885
886 Raises BadValue if invalid value.
887 """
888 if not sValue:
889 raise TestType.BadValue('empty value');
890
891 # Deal with sign and detect hexadecimal or decimal.
892 fSignExtend = not self.fUnsigned;
893 if sValue[0] == '-' or sValue[0] == '+':
894 fSignExtend = True;
895 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
896 else:
897 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
898
899 # try convert it to long integer.
900 try:
901 iValue = long(sValue, 16 if fHex else 10);
902 except Exception as oXcpt:
903 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
904
905 # Convert the hex string and pad it to a decent value. Negative values
906 # needs to be manually converted to something non-negative (~-n + 1).
907 if iValue >= 0:
908 sHex = hex(iValue);
909 if sys.version_info[0] < 3:
910 assert sHex[-1] == 'L';
911 sHex = sHex[:-1];
912 assert sHex[:2] == '0x';
913 sHex = sHex[2:];
914 else:
915 sHex = hex(-iValue - 1);
916 if sys.version_info[0] < 3:
917 assert sHex[-1] == 'L';
918 sHex = sHex[:-1];
919 assert sHex[:2] == '0x';
920 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
921 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
922 sHex = 'f' + sHex;
923
924 cDigits = len(sHex);
925 if cDigits <= self.acbSizes[-1] * 2:
926 for cb in self.acbSizes:
927 cNaturalDigits = cb * 2;
928 if cDigits <= cNaturalDigits:
929 break;
930 else:
931 cNaturalDigits = self.acbSizes[-1] * 2;
932 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
933 assert isinstance(cNaturalDigits, int)
934
935 if cNaturalDigits != cDigits:
936 cNeeded = cNaturalDigits - cDigits;
937 if iValue >= 0:
938 sHex = ('0' * cNeeded) + sHex;
939 else:
940 sHex = ('f' * cNeeded) + sHex;
941
942 # Invert and convert to bytearray and return it.
943 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
944
945 return ((fSignExtend, abValue),);
946
947 def validate(self, sValue):
948 """
949 Returns True if value is okay, error message on failure.
950 """
951 try:
952 self.get(sValue);
953 except TestType.BadValue as oXcpt:
954 return oXcpt.sMessage;
955 return True;
956
957 def isAndOrPair(self, sValue):
958 """
959 Checks if sValue is a pair.
960 """
961 _ = sValue;
962 return False;
963
964
965class TestTypeEflags(TestType):
966 """
967 Special value parsing for EFLAGS/RFLAGS/FLAGS.
968 """
969
970 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
971
972 def __init__(self, sName):
973 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
974
975 def get(self, sValue):
976 fClear = 0;
977 fSet = 0;
978 for sFlag in sValue.split(','):
979 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
980 if sConstant is None:
981 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
982 if sConstant[0] == '!':
983 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
984 else:
985 fSet |= g_kdX86EFlagsConstants[sConstant];
986
987 aoSet = TestType.get(self, '0x%x' % (fSet,));
988 if fClear != 0:
989 aoClear = TestType.get(self, '%#x' % (fClear,))
990 assert self.isAndOrPair(sValue) is True;
991 return (aoClear[0], aoSet[0]);
992 assert self.isAndOrPair(sValue) is False;
993 return aoSet;
994
995 def isAndOrPair(self, sValue):
996 for sZeroFlag in self.kdZeroValueFlags:
997 if sValue.find(sZeroFlag) >= 0:
998 return True;
999 return False;
1000
1001class TestTypeFromDict(TestType):
1002 """
1003 Special value parsing for CR0.
1004 """
1005
1006 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1007
1008 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1009 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1010 self.kdConstantsAndValues = kdConstantsAndValues;
1011 self.sConstantPrefix = sConstantPrefix;
1012
1013 def get(self, sValue):
1014 fValue = 0;
1015 for sFlag in sValue.split(','):
1016 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1017 if fFlagValue is None:
1018 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1019 fValue |= fFlagValue;
1020 return TestType.get(self, '0x%x' % (fValue,));
1021
1022
1023class TestInOut(object):
1024 """
1025 One input or output state modifier.
1026
1027 This should be thought as values to modify BS3REGCTX and extended (needs
1028 to be structured) state.
1029 """
1030 ## Assigned operators.
1031 kasOperators = [
1032 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1033 '&~=',
1034 '&=',
1035 '|=',
1036 '='
1037 ];
1038 ## Types
1039 kdTypes = {
1040 'uint': TestType('uint', fUnsigned = True),
1041 'int': TestType('int'),
1042 'efl': TestTypeEflags('efl'),
1043 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1044 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1045 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1046 };
1047 ## CPU context fields.
1048 kdFields = {
1049 # name: ( default type, [both|input|output], )
1050 # Operands.
1051 'op1': ( 'uint', 'both', ), ## \@op1
1052 'op2': ( 'uint', 'both', ), ## \@op2
1053 'op3': ( 'uint', 'both', ), ## \@op3
1054 'op4': ( 'uint', 'both', ), ## \@op4
1055 # Flags.
1056 'efl': ( 'efl', 'both', ),
1057 'efl_undef': ( 'uint', 'output', ),
1058 # 8-bit GPRs.
1059 'al': ( 'uint', 'both', ),
1060 'cl': ( 'uint', 'both', ),
1061 'dl': ( 'uint', 'both', ),
1062 'bl': ( 'uint', 'both', ),
1063 'ah': ( 'uint', 'both', ),
1064 'ch': ( 'uint', 'both', ),
1065 'dh': ( 'uint', 'both', ),
1066 'bh': ( 'uint', 'both', ),
1067 'r8l': ( 'uint', 'both', ),
1068 'r9l': ( 'uint', 'both', ),
1069 'r10l': ( 'uint', 'both', ),
1070 'r11l': ( 'uint', 'both', ),
1071 'r12l': ( 'uint', 'both', ),
1072 'r13l': ( 'uint', 'both', ),
1073 'r14l': ( 'uint', 'both', ),
1074 'r15l': ( 'uint', 'both', ),
1075 # 16-bit GPRs.
1076 'ax': ( 'uint', 'both', ),
1077 'dx': ( 'uint', 'both', ),
1078 'cx': ( 'uint', 'both', ),
1079 'bx': ( 'uint', 'both', ),
1080 'sp': ( 'uint', 'both', ),
1081 'bp': ( 'uint', 'both', ),
1082 'si': ( 'uint', 'both', ),
1083 'di': ( 'uint', 'both', ),
1084 'r8w': ( 'uint', 'both', ),
1085 'r9w': ( 'uint', 'both', ),
1086 'r10w': ( 'uint', 'both', ),
1087 'r11w': ( 'uint', 'both', ),
1088 'r12w': ( 'uint', 'both', ),
1089 'r13w': ( 'uint', 'both', ),
1090 'r14w': ( 'uint', 'both', ),
1091 'r15w': ( 'uint', 'both', ),
1092 # 32-bit GPRs.
1093 'eax': ( 'uint', 'both', ),
1094 'edx': ( 'uint', 'both', ),
1095 'ecx': ( 'uint', 'both', ),
1096 'ebx': ( 'uint', 'both', ),
1097 'esp': ( 'uint', 'both', ),
1098 'ebp': ( 'uint', 'both', ),
1099 'esi': ( 'uint', 'both', ),
1100 'edi': ( 'uint', 'both', ),
1101 'r8d': ( 'uint', 'both', ),
1102 'r9d': ( 'uint', 'both', ),
1103 'r10d': ( 'uint', 'both', ),
1104 'r11d': ( 'uint', 'both', ),
1105 'r12d': ( 'uint', 'both', ),
1106 'r13d': ( 'uint', 'both', ),
1107 'r14d': ( 'uint', 'both', ),
1108 'r15d': ( 'uint', 'both', ),
1109 # 64-bit GPRs.
1110 'rax': ( 'uint', 'both', ),
1111 'rdx': ( 'uint', 'both', ),
1112 'rcx': ( 'uint', 'both', ),
1113 'rbx': ( 'uint', 'both', ),
1114 'rsp': ( 'uint', 'both', ),
1115 'rbp': ( 'uint', 'both', ),
1116 'rsi': ( 'uint', 'both', ),
1117 'rdi': ( 'uint', 'both', ),
1118 'r8': ( 'uint', 'both', ),
1119 'r9': ( 'uint', 'both', ),
1120 'r10': ( 'uint', 'both', ),
1121 'r11': ( 'uint', 'both', ),
1122 'r12': ( 'uint', 'both', ),
1123 'r13': ( 'uint', 'both', ),
1124 'r14': ( 'uint', 'both', ),
1125 'r15': ( 'uint', 'both', ),
1126 # 16-bit, 32-bit or 64-bit registers according to operand size.
1127 'oz.rax': ( 'uint', 'both', ),
1128 'oz.rdx': ( 'uint', 'both', ),
1129 'oz.rcx': ( 'uint', 'both', ),
1130 'oz.rbx': ( 'uint', 'both', ),
1131 'oz.rsp': ( 'uint', 'both', ),
1132 'oz.rbp': ( 'uint', 'both', ),
1133 'oz.rsi': ( 'uint', 'both', ),
1134 'oz.rdi': ( 'uint', 'both', ),
1135 'oz.r8': ( 'uint', 'both', ),
1136 'oz.r9': ( 'uint', 'both', ),
1137 'oz.r10': ( 'uint', 'both', ),
1138 'oz.r11': ( 'uint', 'both', ),
1139 'oz.r12': ( 'uint', 'both', ),
1140 'oz.r13': ( 'uint', 'both', ),
1141 'oz.r14': ( 'uint', 'both', ),
1142 'oz.r15': ( 'uint', 'both', ),
1143 # Control registers.
1144 'cr0': ( 'cr0', 'both', ),
1145 'cr4': ( 'cr4', 'both', ),
1146 'xcr0': ( 'xcr0', 'both', ),
1147 # FPU Registers
1148 'fcw': ( 'uint', 'both', ),
1149 'fsw': ( 'uint', 'both', ),
1150 'ftw': ( 'uint', 'both', ),
1151 'fop': ( 'uint', 'both', ),
1152 'fpuip': ( 'uint', 'both', ),
1153 'fpucs': ( 'uint', 'both', ),
1154 'fpudp': ( 'uint', 'both', ),
1155 'fpuds': ( 'uint', 'both', ),
1156 'mxcsr': ( 'uint', 'both', ),
1157 'st0': ( 'uint', 'both', ),
1158 'st1': ( 'uint', 'both', ),
1159 'st2': ( 'uint', 'both', ),
1160 'st3': ( 'uint', 'both', ),
1161 'st4': ( 'uint', 'both', ),
1162 'st5': ( 'uint', 'both', ),
1163 'st6': ( 'uint', 'both', ),
1164 'st7': ( 'uint', 'both', ),
1165 # MMX registers.
1166 'mm0': ( 'uint', 'both', ),
1167 'mm1': ( 'uint', 'both', ),
1168 'mm2': ( 'uint', 'both', ),
1169 'mm3': ( 'uint', 'both', ),
1170 'mm4': ( 'uint', 'both', ),
1171 'mm5': ( 'uint', 'both', ),
1172 'mm6': ( 'uint', 'both', ),
1173 'mm7': ( 'uint', 'both', ),
1174 # SSE registers.
1175 'xmm0': ( 'uint', 'both', ),
1176 'xmm1': ( 'uint', 'both', ),
1177 'xmm2': ( 'uint', 'both', ),
1178 'xmm3': ( 'uint', 'both', ),
1179 'xmm4': ( 'uint', 'both', ),
1180 'xmm5': ( 'uint', 'both', ),
1181 'xmm6': ( 'uint', 'both', ),
1182 'xmm7': ( 'uint', 'both', ),
1183 'xmm8': ( 'uint', 'both', ),
1184 'xmm9': ( 'uint', 'both', ),
1185 'xmm10': ( 'uint', 'both', ),
1186 'xmm11': ( 'uint', 'both', ),
1187 'xmm12': ( 'uint', 'both', ),
1188 'xmm13': ( 'uint', 'both', ),
1189 'xmm14': ( 'uint', 'both', ),
1190 'xmm15': ( 'uint', 'both', ),
1191 'xmm0.lo': ( 'uint', 'both', ),
1192 'xmm1.lo': ( 'uint', 'both', ),
1193 'xmm2.lo': ( 'uint', 'both', ),
1194 'xmm3.lo': ( 'uint', 'both', ),
1195 'xmm4.lo': ( 'uint', 'both', ),
1196 'xmm5.lo': ( 'uint', 'both', ),
1197 'xmm6.lo': ( 'uint', 'both', ),
1198 'xmm7.lo': ( 'uint', 'both', ),
1199 'xmm8.lo': ( 'uint', 'both', ),
1200 'xmm9.lo': ( 'uint', 'both', ),
1201 'xmm10.lo': ( 'uint', 'both', ),
1202 'xmm11.lo': ( 'uint', 'both', ),
1203 'xmm12.lo': ( 'uint', 'both', ),
1204 'xmm13.lo': ( 'uint', 'both', ),
1205 'xmm14.lo': ( 'uint', 'both', ),
1206 'xmm15.lo': ( 'uint', 'both', ),
1207 'xmm0.hi': ( 'uint', 'both', ),
1208 'xmm1.hi': ( 'uint', 'both', ),
1209 'xmm2.hi': ( 'uint', 'both', ),
1210 'xmm3.hi': ( 'uint', 'both', ),
1211 'xmm4.hi': ( 'uint', 'both', ),
1212 'xmm5.hi': ( 'uint', 'both', ),
1213 'xmm6.hi': ( 'uint', 'both', ),
1214 'xmm7.hi': ( 'uint', 'both', ),
1215 'xmm8.hi': ( 'uint', 'both', ),
1216 'xmm9.hi': ( 'uint', 'both', ),
1217 'xmm10.hi': ( 'uint', 'both', ),
1218 'xmm11.hi': ( 'uint', 'both', ),
1219 'xmm12.hi': ( 'uint', 'both', ),
1220 'xmm13.hi': ( 'uint', 'both', ),
1221 'xmm14.hi': ( 'uint', 'both', ),
1222 'xmm15.hi': ( 'uint', 'both', ),
1223 'xmm0.lo.zx': ( 'uint', 'both', ),
1224 'xmm1.lo.zx': ( 'uint', 'both', ),
1225 'xmm2.lo.zx': ( 'uint', 'both', ),
1226 'xmm3.lo.zx': ( 'uint', 'both', ),
1227 'xmm4.lo.zx': ( 'uint', 'both', ),
1228 'xmm5.lo.zx': ( 'uint', 'both', ),
1229 'xmm6.lo.zx': ( 'uint', 'both', ),
1230 'xmm7.lo.zx': ( 'uint', 'both', ),
1231 'xmm8.lo.zx': ( 'uint', 'both', ),
1232 'xmm9.lo.zx': ( 'uint', 'both', ),
1233 'xmm10.lo.zx': ( 'uint', 'both', ),
1234 'xmm11.lo.zx': ( 'uint', 'both', ),
1235 'xmm12.lo.zx': ( 'uint', 'both', ),
1236 'xmm13.lo.zx': ( 'uint', 'both', ),
1237 'xmm14.lo.zx': ( 'uint', 'both', ),
1238 'xmm15.lo.zx': ( 'uint', 'both', ),
1239 'xmm0.dw0': ( 'uint', 'both', ),
1240 'xmm1.dw0': ( 'uint', 'both', ),
1241 'xmm2.dw0': ( 'uint', 'both', ),
1242 'xmm3.dw0': ( 'uint', 'both', ),
1243 'xmm4.dw0': ( 'uint', 'both', ),
1244 'xmm5.dw0': ( 'uint', 'both', ),
1245 'xmm6.dw0': ( 'uint', 'both', ),
1246 'xmm7.dw0': ( 'uint', 'both', ),
1247 'xmm8.dw0': ( 'uint', 'both', ),
1248 'xmm9.dw0': ( 'uint', 'both', ),
1249 'xmm10.dw0': ( 'uint', 'both', ),
1250 'xmm11.dw0': ( 'uint', 'both', ),
1251 'xmm12.dw0': ( 'uint', 'both', ),
1252 'xmm13.dw0': ( 'uint', 'both', ),
1253 'xmm14.dw0': ( 'uint', 'both', ),
1254 'xmm15_dw0': ( 'uint', 'both', ),
1255 # AVX registers.
1256 'ymm0': ( 'uint', 'both', ),
1257 'ymm1': ( 'uint', 'both', ),
1258 'ymm2': ( 'uint', 'both', ),
1259 'ymm3': ( 'uint', 'both', ),
1260 'ymm4': ( 'uint', 'both', ),
1261 'ymm5': ( 'uint', 'both', ),
1262 'ymm6': ( 'uint', 'both', ),
1263 'ymm7': ( 'uint', 'both', ),
1264 'ymm8': ( 'uint', 'both', ),
1265 'ymm9': ( 'uint', 'both', ),
1266 'ymm10': ( 'uint', 'both', ),
1267 'ymm11': ( 'uint', 'both', ),
1268 'ymm12': ( 'uint', 'both', ),
1269 'ymm13': ( 'uint', 'both', ),
1270 'ymm14': ( 'uint', 'both', ),
1271 'ymm15': ( 'uint', 'both', ),
1272
1273 # Special ones.
1274 'value.xcpt': ( 'uint', 'output', ),
1275 };
1276
1277 def __init__(self, sField, sOp, sValue, sType):
1278 assert sField in self.kdFields;
1279 assert sOp in self.kasOperators;
1280 self.sField = sField;
1281 self.sOp = sOp;
1282 self.sValue = sValue;
1283 self.sType = sType;
1284 assert isinstance(sField, str);
1285 assert isinstance(sOp, str);
1286 assert isinstance(sType, str);
1287 assert isinstance(sValue, str);
1288
1289
1290class TestSelector(object):
1291 """
1292 One selector for an instruction test.
1293 """
1294 ## Selector compare operators.
1295 kasCompareOps = [ '==', '!=' ];
1296 ## Selector variables and their valid values.
1297 kdVariables = {
1298 # Operand size.
1299 'size': {
1300 'o16': 'size_o16',
1301 'o32': 'size_o32',
1302 'o64': 'size_o64',
1303 },
1304 # VEX.L value.
1305 'vex.l': {
1306 '0': 'vexl_0',
1307 '1': 'vexl_1',
1308 },
1309 # Execution ring.
1310 'ring': {
1311 '0': 'ring_0',
1312 '1': 'ring_1',
1313 '2': 'ring_2',
1314 '3': 'ring_3',
1315 '0..2': 'ring_0_thru_2',
1316 '1..3': 'ring_1_thru_3',
1317 },
1318 # Basic code mode.
1319 'codebits': {
1320 '64': 'code_64bit',
1321 '32': 'code_32bit',
1322 '16': 'code_16bit',
1323 },
1324 # cpu modes.
1325 'mode': {
1326 'real': 'mode_real',
1327 'prot': 'mode_prot',
1328 'long': 'mode_long',
1329 'v86': 'mode_v86',
1330 'smm': 'mode_smm',
1331 'vmx': 'mode_vmx',
1332 'svm': 'mode_svm',
1333 },
1334 # paging on/off
1335 'paging': {
1336 'on': 'paging_on',
1337 'off': 'paging_off',
1338 },
1339 # CPU vendor
1340 'vendor': {
1341 'amd': 'vendor_amd',
1342 'intel': 'vendor_intel',
1343 'via': 'vendor_via',
1344 },
1345 };
1346 ## Selector shorthand predicates.
1347 ## These translates into variable expressions.
1348 kdPredicates = {
1349 'o16': 'size==o16',
1350 'o32': 'size==o32',
1351 'o64': 'size==o64',
1352 'ring0': 'ring==0',
1353 '!ring0': 'ring==1..3',
1354 'ring1': 'ring==1',
1355 'ring2': 'ring==2',
1356 'ring3': 'ring==3',
1357 'user': 'ring==3',
1358 'supervisor': 'ring==0..2',
1359 '16-bit': 'codebits==16',
1360 '32-bit': 'codebits==32',
1361 '64-bit': 'codebits==64',
1362 'real': 'mode==real',
1363 'prot': 'mode==prot',
1364 'long': 'mode==long',
1365 'v86': 'mode==v86',
1366 'smm': 'mode==smm',
1367 'vmx': 'mode==vmx',
1368 'svm': 'mode==svm',
1369 'paging': 'paging==on',
1370 '!paging': 'paging==off',
1371 'amd': 'vendor==amd',
1372 '!amd': 'vendor!=amd',
1373 'intel': 'vendor==intel',
1374 '!intel': 'vendor!=intel',
1375 'via': 'vendor==via',
1376 '!via': 'vendor!=via',
1377 };
1378
1379 def __init__(self, sVariable, sOp, sValue):
1380 assert sVariable in self.kdVariables;
1381 assert sOp in self.kasCompareOps;
1382 assert sValue in self.kdVariables[sVariable];
1383 self.sVariable = sVariable;
1384 self.sOp = sOp;
1385 self.sValue = sValue;
1386
1387
1388class InstructionTest(object):
1389 """
1390 Instruction test.
1391 """
1392
1393 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1394 self.oInstr = oInstr # type: InstructionTest
1395 self.aoInputs = [] # type: List[TestInOut]
1396 self.aoOutputs = [] # type: List[TestInOut]
1397 self.aoSelectors = [] # type: List[TestSelector]
1398
1399 def toString(self, fRepr = False):
1400 """
1401 Converts it to string representation.
1402 """
1403 asWords = [];
1404 if self.aoSelectors:
1405 for oSelector in self.aoSelectors:
1406 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1407 asWords.append('/');
1408
1409 for oModifier in self.aoInputs:
1410 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1411
1412 asWords.append('->');
1413
1414 for oModifier in self.aoOutputs:
1415 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1416
1417 if fRepr:
1418 return '<' + ' '.join(asWords) + '>';
1419 return ' '.join(asWords);
1420
1421 def __str__(self):
1422 """ Provide string represenation. """
1423 return self.toString(False);
1424
1425 def __repr__(self):
1426 """ Provide unambigious string representation. """
1427 return self.toString(True);
1428
1429class Operand(object):
1430 """
1431 Instruction operand.
1432 """
1433
1434 def __init__(self, sWhere, sType):
1435 assert sWhere in g_kdOpLocations, sWhere;
1436 assert sType in g_kdOpTypes, sType;
1437 self.sWhere = sWhere; ##< g_kdOpLocations
1438 self.sType = sType; ##< g_kdOpTypes
1439
1440 def usesModRM(self):
1441 """ Returns True if using some form of ModR/M encoding. """
1442 return self.sType[0] in ['E', 'G', 'M'];
1443
1444
1445
1446class Instruction(object): # pylint: disable=too-many-instance-attributes
1447 """
1448 Instruction.
1449 """
1450
1451 def __init__(self, sSrcFile, iLine):
1452 ## @name Core attributes.
1453 ## @{
1454 self.oParent = None # type: Instruction
1455 self.sMnemonic = None;
1456 self.sBrief = None;
1457 self.asDescSections = [] # type: List[str]
1458 self.aoMaps = [] # type: List[InstructionMap]
1459 self.aoOperands = [] # type: List[Operand]
1460 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1461 self.sOpcode = None # type: str
1462 self.sSubOpcode = None # type: str
1463 self.sEncoding = None;
1464 self.asFlTest = None;
1465 self.asFlModify = None;
1466 self.asFlUndefined = None;
1467 self.asFlSet = None;
1468 self.asFlClear = None;
1469 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1470 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1471 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1472 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1473 self.aoTests = [] # type: List[InstructionTest]
1474 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1475 self.oCpuExpr = None; ##< Some CPU restriction expression...
1476 self.sGroup = None;
1477 self.fUnused = False; ##< Unused instruction.
1478 self.fInvalid = False; ##< Invalid instruction (like UD2).
1479 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1480 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1481 ## @}
1482
1483 ## @name Implementation attributes.
1484 ## @{
1485 self.sStats = None;
1486 self.sFunction = None;
1487 self.fStub = False;
1488 self.fUdStub = False;
1489 ## @}
1490
1491 ## @name Decoding info
1492 ## @{
1493 self.sSrcFile = sSrcFile;
1494 self.iLineCreated = iLine;
1495 self.iLineCompleted = None;
1496 self.cOpTags = 0;
1497 self.iLineFnIemOpMacro = -1;
1498 self.iLineMnemonicMacro = -1;
1499 ## @}
1500
1501 ## @name Intermediate input fields.
1502 ## @{
1503 self.sRawDisOpNo = None;
1504 self.asRawDisParams = [];
1505 self.sRawIemOpFlags = None;
1506 self.sRawOldOpcodes = None;
1507 self.asCopyTests = [];
1508 ## @}
1509
1510 ## All the MC blocks associated with this instruction.
1511 self.aoMcBlocks = [] # type: List[McBlock]
1512
1513 def toString(self, fRepr = False):
1514 """ Turn object into a string. """
1515 aasFields = [];
1516
1517 aasFields.append(['opcode', self.sOpcode]);
1518 if self.sPrefix:
1519 aasFields.append(['prefix', self.sPrefix]);
1520 aasFields.append(['mnemonic', self.sMnemonic]);
1521 for iOperand, oOperand in enumerate(self.aoOperands):
1522 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1523 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1524 aasFields.append(['encoding', self.sEncoding]);
1525 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1526 aasFields.append(['disenum', self.sDisEnum]);
1527 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1528 aasFields.append(['group', self.sGroup]);
1529 if self.fUnused: aasFields.append(['unused', 'True']);
1530 if self.fInvalid: aasFields.append(['invalid', 'True']);
1531 aasFields.append(['invlstyle', self.sInvalidStyle]);
1532 aasFields.append(['fltest', self.asFlTest]);
1533 aasFields.append(['flmodify', self.asFlModify]);
1534 aasFields.append(['flundef', self.asFlUndefined]);
1535 aasFields.append(['flset', self.asFlSet]);
1536 aasFields.append(['flclear', self.asFlClear]);
1537 aasFields.append(['mincpu', self.sMinCpu]);
1538 aasFields.append(['stats', self.sStats]);
1539 aasFields.append(['sFunction', self.sFunction]);
1540 if self.fStub: aasFields.append(['fStub', 'True']);
1541 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1542 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1543 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1544 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1545
1546 sRet = '<' if fRepr else '';
1547 for sField, sValue in aasFields:
1548 if sValue is not None:
1549 if len(sRet) > 1:
1550 sRet += '; ';
1551 sRet += '%s=%s' % (sField, sValue,);
1552 if fRepr:
1553 sRet += '>';
1554
1555 return sRet;
1556
1557 def __str__(self):
1558 """ Provide string represenation. """
1559 return self.toString(False);
1560
1561 def __repr__(self):
1562 """ Provide unambigious string representation. """
1563 return self.toString(True);
1564
1565 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1566 """
1567 Makes a copy of the object for the purpose of putting in a different map
1568 or a different place in the current map.
1569 """
1570 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1571
1572 oCopy.oParent = self;
1573 oCopy.sMnemonic = self.sMnemonic;
1574 oCopy.sBrief = self.sBrief;
1575 oCopy.asDescSections = list(self.asDescSections);
1576 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1577 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1578 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1579 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1580 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1581 oCopy.sEncoding = self.sEncoding;
1582 oCopy.asFlTest = self.asFlTest;
1583 oCopy.asFlModify = self.asFlModify;
1584 oCopy.asFlUndefined = self.asFlUndefined;
1585 oCopy.asFlSet = self.asFlSet;
1586 oCopy.asFlClear = self.asFlClear;
1587 oCopy.dHints = dict(self.dHints);
1588 oCopy.sDisEnum = self.sDisEnum;
1589 oCopy.asCpuIds = list(self.asCpuIds);
1590 oCopy.asReqFeatures = list(self.asReqFeatures);
1591 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1592 oCopy.sMinCpu = self.sMinCpu;
1593 oCopy.oCpuExpr = self.oCpuExpr;
1594 oCopy.sGroup = self.sGroup;
1595 oCopy.fUnused = self.fUnused;
1596 oCopy.fInvalid = self.fInvalid;
1597 oCopy.sInvalidStyle = self.sInvalidStyle;
1598 oCopy.sXcptType = self.sXcptType;
1599
1600 oCopy.sStats = self.sStats;
1601 oCopy.sFunction = self.sFunction;
1602 oCopy.fStub = self.fStub;
1603 oCopy.fUdStub = self.fUdStub;
1604
1605 oCopy.iLineCompleted = self.iLineCompleted;
1606 oCopy.cOpTags = self.cOpTags;
1607 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1608 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1609
1610 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1611 oCopy.asRawDisParams = list(self.asRawDisParams);
1612 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1613 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1614 oCopy.asCopyTests = list(self.asCopyTests);
1615
1616 return oCopy;
1617
1618 def getOpcodeByte(self):
1619 """
1620 Decodes sOpcode into a byte range integer value.
1621 Raises exception if sOpcode is None or invalid.
1622 """
1623 if self.sOpcode is None:
1624 raise Exception('No opcode byte for %s!' % (self,));
1625 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1626
1627 # Full hex byte form.
1628 if sOpcode[:2] == '0x':
1629 return int(sOpcode, 16);
1630
1631 # The /r form:
1632 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1633 return int(sOpcode[1:]) << 3;
1634
1635 # The 11/r form:
1636 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1637 return (int(sOpcode[-1:]) << 3) | 0xc0;
1638
1639 # The !11/r form (returns mod=1):
1640 ## @todo this doesn't really work...
1641 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1642 return (int(sOpcode[-1:]) << 3) | 0x80;
1643
1644 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1645
1646 @staticmethod
1647 def _flagsToIntegerMask(asFlags):
1648 """
1649 Returns the integer mask value for asFlags.
1650 """
1651 uRet = 0;
1652 if asFlags:
1653 for sFlag in asFlags:
1654 sConstant = g_kdEFlagsMnemonics[sFlag];
1655 assert sConstant[0] != '!', sConstant
1656 uRet |= g_kdX86EFlagsConstants[sConstant];
1657 return uRet;
1658
1659 def getTestedFlagsMask(self):
1660 """ Returns asFlTest into a integer mask value """
1661 return self._flagsToIntegerMask(self.asFlTest);
1662
1663 def getModifiedFlagsMask(self):
1664 """ Returns asFlModify into a integer mask value """
1665 return self._flagsToIntegerMask(self.asFlModify);
1666
1667 def getUndefinedFlagsMask(self):
1668 """ Returns asFlUndefined into a integer mask value """
1669 return self._flagsToIntegerMask(self.asFlUndefined);
1670
1671 def getSetFlagsMask(self):
1672 """ Returns asFlSet into a integer mask value """
1673 return self._flagsToIntegerMask(self.asFlSet);
1674
1675 def getClearedFlagsMask(self):
1676 """ Returns asFlClear into a integer mask value """
1677 return self._flagsToIntegerMask(self.asFlClear);
1678
1679 @staticmethod
1680 def _flagsToC(asFlags):
1681 """
1682 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1683 """
1684 if asFlags:
1685 asRet = [];
1686 for sFlag in asFlags:
1687 sConstant = g_kdEFlagsMnemonics[sFlag];
1688 assert sConstant[0] != '!', sConstant
1689 asRet.append(sConstant);
1690 return ' | '.join(asRet);
1691 return '0';
1692
1693 def getTestedFlagsCStyle(self):
1694 """ Returns asFlTest as C constants ored together. """
1695 return self._flagsToC(self.asFlTest);
1696
1697 def getModifiedFlagsCStyle(self):
1698 """ Returns asFlModify as C constants ored together. """
1699 return self._flagsToC(self.asFlModify);
1700
1701 def getUndefinedFlagsCStyle(self):
1702 """ Returns asFlUndefined as C constants ored together. """
1703 return self._flagsToC(self.asFlUndefined);
1704
1705 def getSetFlagsCStyle(self):
1706 """ Returns asFlSet as C constants ored together. """
1707 return self._flagsToC(self.asFlSet);
1708
1709 def getClearedFlagsCStyle(self):
1710 """ Returns asFlClear as C constants ored together. """
1711 return self._flagsToC(self.asFlClear);
1712
1713 def onlyInVexMaps(self):
1714 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1715 if not self.aoMaps:
1716 return False;
1717 for oMap in self.aoMaps:
1718 if not oMap.isVexMap():
1719 return False;
1720 return True;
1721
1722
1723
1724## All the instructions.
1725g_aoAllInstructions = [] # type: List[Instruction]
1726
1727## All the instructions indexed by statistics name (opstat).
1728g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1729
1730## All the instructions indexed by function name (opfunction).
1731g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1732
1733## Instructions tagged by oponlytest
1734g_aoOnlyTestInstructions = [] # type: List[Instruction]
1735
1736## Instruction maps.
1737g_aoInstructionMaps = [
1738 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1739 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1740 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1741 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1742 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1743 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1744 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1745 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1746 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1747 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1748 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1749 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1750 ## @todo g_apfnEscF1_E0toFF
1751 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1752 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1753 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1754 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1755 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1756 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1757 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1758 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1759
1760 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1761 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1762 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1763 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1764 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1765 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1766 ## @todo What about g_apfnGroup9MemReg?
1767 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1768 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1769 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1770 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1771 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1772 ## @todo What about g_apfnGroup15RegReg?
1773 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1774 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1775 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1776
1777 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1778 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1779
1780 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1781 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1782 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1783 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1784 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1785 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1786
1787 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1788 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1789
1790 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1791 InstructionMap('xopmap8', sEncoding = 'xop8'),
1792 InstructionMap('xopmap9', sEncoding = 'xop9'),
1793 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1794 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1795 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1796 InstructionMap('xopmap10', sEncoding = 'xop10'),
1797 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1798];
1799g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1800g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1801
1802
1803#
1804# Decoder functions.
1805#
1806
1807class DecoderFunction(object):
1808 """
1809 Decoder function.
1810
1811 This is mainly for searching for scoping searches for variables used in
1812 microcode blocks.
1813 """
1814 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1815 self.sName = sName; ##< The function name.
1816 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1817 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1818 self.iBeginLine = iBeginLine; ##< The start line.
1819 self.iEndLine = -1; ##< The line the function (probably) ends on.
1820 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1821
1822 def complete(self, iEndLine, asLines):
1823 """
1824 Completes the function.
1825 """
1826 assert self.iEndLine == -1;
1827 self.iEndLine = iEndLine;
1828 self.asLines = asLines;
1829
1830
1831#
1832# "Microcode" statements and blocks
1833#
1834
1835class McStmt(object):
1836 """
1837 Statement in a microcode block.
1838 """
1839 def __init__(self, sName, asParams):
1840 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1841 self.asParams = asParams;
1842 self.oUser = None;
1843
1844 def renderCode(self, cchIndent = 0):
1845 """
1846 Renders the code for the statement.
1847 """
1848 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1849
1850 @staticmethod
1851 def renderCodeForList(aoStmts, cchIndent = 0):
1852 """
1853 Renders a list of statements.
1854 """
1855 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1856
1857 @staticmethod
1858 def findStmtByNames(aoStmts, dNames):
1859 """
1860 Returns first statement with any of the given names in from the list.
1861
1862 Note! The names are passed as a dictionary for quick lookup, the value
1863 does not matter.
1864 """
1865 for oStmt in aoStmts:
1866 if oStmt.sName in dNames:
1867 return oStmt;
1868 if isinstance(oStmt, McStmtCond):
1869 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1870 if not oHit:
1871 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1872 if oHit:
1873 return oHit;
1874 return None;
1875
1876 @staticmethod
1877 def countStmtsByName(aoStmts, dNames, dRet):
1878 """
1879 Searches the given list of statements for the names in the dictionary,
1880 adding each found to dRet with an occurnece count.
1881
1882 return total number of hits;
1883 """
1884 cHits = 0;
1885 for oStmt in aoStmts:
1886 if oStmt.sName in dNames:
1887 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1888 cHits += 1;
1889 if isinstance(oStmt, McStmtCond):
1890 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1891 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1892 return cHits;
1893
1894 def isCppStmt(self):
1895 """ Checks if this is a C++ statement. """
1896 return self.sName.startswith('C++');
1897
1898class McStmtCond(McStmt):
1899 """
1900 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1901 """
1902 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1903 McStmt.__init__(self, sName, asParams);
1904 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1905 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1906 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1907 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1908 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1909
1910 def renderCode(self, cchIndent = 0):
1911 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1912 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1913 if self.aoElseBranch:
1914 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1915 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1916 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1917 return sRet;
1918
1919class McStmtNativeIf(McStmtCond):
1920 """ IEM_MC_NATIVE_IF """
1921 def __init__(self, sName, asArchitectures):
1922 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1923 self.asArchitectures = asArchitectures;
1924
1925class McStmtVar(McStmt):
1926 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1927 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1928 McStmt.__init__(self, sName, asParams);
1929 self.sType = sType;
1930 self.sVarName = sVarName;
1931 self.sValue = sValue; ##< None if no assigned / const value.
1932
1933class McStmtArg(McStmtVar):
1934 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1935 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1936 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1937 self.iArg = iArg;
1938 self.sRef = sRef; ##< The reference string (local variable, register).
1939 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1940 assert sRefType in ('none', 'local');
1941
1942class McStmtCall(McStmt):
1943 """ IEM_MC_CALL_* """
1944 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1945 McStmt.__init__(self, sName, asParams);
1946 self.idxFn = iFnParam;
1947 self.idxParams = iFnParam + 1;
1948 self.sFn = asParams[iFnParam];
1949 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1950
1951class McStmtAssertEFlags(McStmt):
1952 """
1953 IEM_MC_ASSERT_EFLAGS
1954 """
1955 def __init__(self, oInstruction):
1956 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1957 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1958
1959
1960class McCppGeneric(McStmt):
1961 """
1962 Generic C++/C statement.
1963 """
1964 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1965 McStmt.__init__(self, sName, [sCode,]);
1966 self.fDecode = fDecode;
1967 self.cchIndent = cchIndent;
1968
1969 def renderCode(self, cchIndent = 0):
1970 cchIndent += self.cchIndent;
1971 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1972 if self.fDecode:
1973 sRet = sRet.replace('\n', ' // C++ decode\n');
1974 else:
1975 sRet = sRet.replace('\n', ' // C++ normal\n');
1976 return sRet;
1977
1978class McCppCall(McCppGeneric):
1979 """
1980 A generic C++/C call statement.
1981
1982 The sName is still 'C++', so the function name is in the first parameter
1983 and the the arguments in the subsequent ones.
1984 """
1985 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1986 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1987 self.asParams.extend(asArgs);
1988
1989 def renderCode(self, cchIndent = 0):
1990 cchIndent += self.cchIndent;
1991 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1992 if self.fDecode:
1993 sRet += ' // C++ decode\n';
1994 else:
1995 sRet += ' // C++ normal\n';
1996 return sRet;
1997
1998class McCppCond(McStmtCond):
1999 """
2000 C++/C 'if' statement.
2001 """
2002 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2003 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2004 self.fDecode = fDecode;
2005 self.cchIndent = cchIndent;
2006
2007 def renderCode(self, cchIndent = 0):
2008 cchIndent += self.cchIndent;
2009 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2010 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2011 sRet += ' ' * cchIndent + '{\n';
2012 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2013 sRet += ' ' * cchIndent + '}\n';
2014 if self.aoElseBranch:
2015 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2016 sRet += ' ' * cchIndent + '{\n';
2017 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2018 sRet += ' ' * cchIndent + '}\n';
2019 return sRet;
2020
2021class McCppPreProc(McCppGeneric):
2022 """
2023 C++/C Preprocessor directive.
2024 """
2025 def __init__(self, sCode):
2026 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2027
2028 def renderCode(self, cchIndent = 0):
2029 return self.asParams[0] + '\n';
2030
2031
2032## IEM_MC_F_XXX values.
2033g_kdMcFlags = {
2034 'IEM_MC_F_ONLY_8086': (),
2035 'IEM_MC_F_MIN_186': (),
2036 'IEM_MC_F_MIN_286': (),
2037 'IEM_MC_F_NOT_286_OR_OLDER': (),
2038 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2039 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2040 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2041 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2042 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2043 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2044 'IEM_MC_F_NOT_64BIT': (),
2045};
2046## IEM_MC_F_XXX values.
2047g_kdCImplFlags = {
2048 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2049 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2050 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2051 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2052 'IEM_CIMPL_F_BRANCH_FAR': (),
2053 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2054 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2055 'IEM_CIMPL_F_BRANCH_STACK': (),
2056 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2057 'IEM_CIMPL_F_MODE': (),
2058 'IEM_CIMPL_F_RFLAGS': (),
2059 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2060 'IEM_CIMPL_F_STATUS_FLAGS': (),
2061 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2062 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2063 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2064 'IEM_CIMPL_F_VMEXIT': (),
2065 'IEM_CIMPL_F_FPU': (),
2066 'IEM_CIMPL_F_REP': (),
2067 'IEM_CIMPL_F_IO': (),
2068 'IEM_CIMPL_F_END_TB': (),
2069 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2070 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2071 'IEM_CIMPL_F_CALLS_CIMPL': (),
2072 'IEM_CIMPL_F_CALLS_AIMPL': (),
2073 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2074};
2075class McBlock(object):
2076 """
2077 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2078 """
2079
2080 ## @name Macro expansion types.
2081 ## @{
2082 kiMacroExp_None = 0;
2083 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2084 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2085 ## @}
2086
2087 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2088 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2089 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2090 self.fDeferToCImpl = fDeferToCImpl;
2091 ## The source file containing the block.
2092 self.sSrcFile = sSrcFile;
2093 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2094 self.iBeginLine = iBeginLine;
2095 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2096 self.offBeginLine = offBeginLine;
2097 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2098 self.iEndLine = -1;
2099 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2100 self.offEndLine = 0;
2101 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2102 self.offAfterEnd = 0;
2103 ## The function the block resides in.
2104 self.oFunction = oFunction;
2105 ## The name of the function the block resides in. DEPRECATED.
2106 self.sFunction = oFunction.sName;
2107 ## The block number within the function.
2108 self.iInFunction = iInFunction;
2109 ## The instruction this block is associated with - can be None.
2110 self.oInstruction = oInstruction # type: Instruction
2111 ## Indentation level of the block.
2112 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2113 ## The raw lines the block is made up of.
2114 self.asLines = [] # type: List[str]
2115 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2116 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2117 self.iMacroExp = self.kiMacroExp_None;
2118 ## IEM_MC_BEGIN: Argument count.
2119 self.cArgs = -1;
2120 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2121 self.aoArgs = [] # type: List[McStmtArg]
2122 ## IEM_MC_BEGIN: Locals count.
2123 self.cLocals = -1;
2124 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2125 self.aoLocals = [] # type: List[McStmtVar]
2126 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2127 self.dsMcFlags = {} # type: Dict[str, bool]
2128 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2129 self.dsCImplFlags = {} # type: Dict[str, bool]
2130 ## Decoded statements in the block.
2131 self.aoStmts = [] # type: List[McStmt]
2132
2133 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2134 """
2135 Completes the microcode block.
2136 """
2137 assert self.iEndLine == -1;
2138 self.iEndLine = iEndLine;
2139 self.offEndLine = offEndLine;
2140 self.offAfterEnd = offAfterEnd;
2141 self.asLines = asLines;
2142
2143 def raiseDecodeError(self, sRawCode, off, sMessage):
2144 """ Raises a decoding error. """
2145 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2146 iLine = sRawCode.count('\n', 0, off);
2147 raise ParserException('%s:%d:%d: parsing error: %s'
2148 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2149
2150 def raiseStmtError(self, sName, sMessage):
2151 """ Raises a statement parser error. """
2152 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2153
2154 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2155 """ Check the parameter count, raising an error it doesn't match. """
2156 if len(asParams) != cParamsExpected:
2157 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2158 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2159 return True;
2160
2161 @staticmethod
2162 def parseMcGeneric(oSelf, sName, asParams):
2163 """ Generic parser that returns a plain McStmt object. """
2164 _ = oSelf;
2165 return McStmt(sName, asParams);
2166
2167 @staticmethod
2168 def parseMcGenericCond(oSelf, sName, asParams):
2169 """ Generic parser that returns a plain McStmtCond object. """
2170 _ = oSelf;
2171 return McStmtCond(sName, asParams);
2172
2173 kdArchVals = {
2174 'RT_ARCH_VAL_X86': True,
2175 'RT_ARCH_VAL_AMD64': True,
2176 'RT_ARCH_VAL_ARM32': True,
2177 'RT_ARCH_VAL_ARM64': True,
2178 'RT_ARCH_VAL_SPARC32': True,
2179 'RT_ARCH_VAL_SPARC64': True,
2180 };
2181
2182 @staticmethod
2183 def parseMcNativeIf(oSelf, sName, asParams):
2184 """ IEM_MC_NATIVE_IF """
2185 oSelf.checkStmtParamCount(sName, asParams, 1);
2186 if asParams[0].strip() == '0':
2187 asArchitectures = [];
2188 else:
2189 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2190 for sArch in asArchitectures:
2191 if sArch not in oSelf.kdArchVals:
2192 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2193 return McStmtNativeIf(sName, asArchitectures);
2194
2195 @staticmethod
2196 def parseMcBegin(oSelf, sName, asParams):
2197 """ IEM_MC_BEGIN """
2198 oSelf.checkStmtParamCount(sName, asParams, 4);
2199 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2200 oSelf.raiseStmtError(sName, 'Used more than once!');
2201 oSelf.cArgs = int(asParams[0]);
2202 oSelf.cLocals = int(asParams[1]);
2203
2204 if asParams[2] != '0':
2205 for sFlag in asParams[2].split('|'):
2206 sFlag = sFlag.strip();
2207 if sFlag not in g_kdMcFlags:
2208 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2209 oSelf.dsMcFlags[sFlag] = True;
2210 for sFlag2 in g_kdMcFlags[sFlag]:
2211 oSelf.dsMcFlags[sFlag2] = True;
2212
2213 if asParams[3] != '0':
2214 oSelf.parseCImplFlags(sName, asParams[3]);
2215
2216 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2217
2218 @staticmethod
2219 def parseMcArg(oSelf, sName, asParams):
2220 """ IEM_MC_ARG """
2221 oSelf.checkStmtParamCount(sName, asParams, 3);
2222 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2223 oSelf.aoArgs.append(oStmt);
2224 return oStmt;
2225
2226 @staticmethod
2227 def parseMcArgConst(oSelf, sName, asParams):
2228 """ IEM_MC_ARG_CONST """
2229 oSelf.checkStmtParamCount(sName, asParams, 4);
2230 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2231 oSelf.aoArgs.append(oStmt);
2232 return oStmt;
2233
2234 @staticmethod
2235 def parseMcArgLocalRef(oSelf, sName, asParams):
2236 """ IEM_MC_ARG_LOCAL_REF """
2237 oSelf.checkStmtParamCount(sName, asParams, 4);
2238 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2239 oSelf.aoArgs.append(oStmt);
2240 return oStmt;
2241
2242 @staticmethod
2243 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2244 """ IEM_MC_ARG_LOCAL_EFLAGS """
2245 oSelf.checkStmtParamCount(sName, asParams, 3);
2246 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2247 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2248 oSelf.aoLocals.append(oStmtLocal);
2249 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2250 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2251 oSelf.aoArgs.append(oStmtArg);
2252 return (oStmtLocal, oStmtArg,);
2253
2254 @staticmethod
2255 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2256 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2257 oSelf.checkStmtParamCount(sName, asParams, 0);
2258 # Note! Translate to IEM_MC_ARG_CONST
2259 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2260 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2261 oSelf.aoArgs.append(oStmt);
2262 return oStmt;
2263
2264 @staticmethod
2265 def parseMcLocal(oSelf, sName, asParams):
2266 """ IEM_MC_LOCAL """
2267 oSelf.checkStmtParamCount(sName, asParams, 2);
2268 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2269 oSelf.aoLocals.append(oStmt);
2270 return oStmt;
2271
2272 @staticmethod
2273 def parseMcLocalAssign(oSelf, sName, asParams):
2274 """ IEM_MC_LOCAL_ASSIGN """
2275 oSelf.checkStmtParamCount(sName, asParams, 3);
2276 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2277 oSelf.aoLocals.append(oStmt);
2278 return oStmt;
2279
2280 @staticmethod
2281 def parseMcLocalConst(oSelf, sName, asParams):
2282 """ IEM_MC_LOCAL_CONST """
2283 oSelf.checkStmtParamCount(sName, asParams, 3);
2284 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2285 oSelf.aoLocals.append(oStmt);
2286 return oStmt;
2287
2288 @staticmethod
2289 def parseMcLocalEFlags(oSelf, sName, asParams):
2290 """ IEM_MC_LOCAL_EFLAGS"""
2291 oSelf.checkStmtParamCount(sName, asParams, 1);
2292 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2293 oSelf.aoLocals.append(oStmt);
2294 return oStmt;
2295
2296 @staticmethod
2297 def parseMcCallAImpl(oSelf, sName, asParams):
2298 """ IEM_MC_CALL_AIMPL_3|4 """
2299 cArgs = int(sName[-1]);
2300 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2301 return McStmtCall(sName, asParams, 1, 0);
2302
2303 @staticmethod
2304 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2305 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2306 cArgs = int(sName[-1]);
2307 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2308 return McStmtCall(sName, asParams, 0);
2309
2310 @staticmethod
2311 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2312 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2313 cArgs = int(sName[-1]);
2314 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2315 return McStmtCall(sName, asParams, 0);
2316
2317 @staticmethod
2318 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2319 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2320 cArgs = int(sName[-1]);
2321 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2322 return McStmtCall(sName, asParams, 0);
2323
2324 @staticmethod
2325 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2326 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2327 cArgs = int(sName[-1]);
2328 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2329 return McStmtCall(sName, asParams, 0);
2330
2331 @staticmethod
2332 def parseMcCallSseAImpl(oSelf, sName, asParams):
2333 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2334 cArgs = int(sName[-1]);
2335 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2336 return McStmtCall(sName, asParams, 0);
2337
2338 def parseCImplFlags(self, sName, sFlags):
2339 """
2340 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2341 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2342 """
2343 if sFlags != '0':
2344 sFlags = self.stripComments(sFlags);
2345 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2346 for sFlag in sFlags.split('|'):
2347 sFlag = sFlag.strip();
2348 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2349 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2350 #print('debug: %s' % sFlag)
2351 if sFlag not in g_kdCImplFlags:
2352 if sFlag == '0':
2353 continue;
2354 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2355 self.dsCImplFlags[sFlag] = True;
2356 for sFlag2 in g_kdCImplFlags[sFlag]:
2357 self.dsCImplFlags[sFlag2] = True;
2358 return None;
2359
2360 @staticmethod
2361 def parseMcCallCImpl(oSelf, sName, asParams):
2362 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2363 cArgs = int(sName[-1]);
2364 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2365 oSelf.parseCImplFlags(sName, asParams[0]);
2366 return McStmtCall(sName, asParams, 2);
2367
2368 @staticmethod
2369 def parseMcDeferToCImpl(oSelf, sName, asParams):
2370 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2371 # Note! This code is called by workerIemMcDeferToCImplXRet.
2372 #print('debug: %s, %s,...' % (sName, asParams[0],));
2373 cArgs = int(sName[-5]);
2374 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2375 oSelf.parseCImplFlags(sName, asParams[0]);
2376 return McStmtCall(sName, asParams, 2);
2377
2378 @staticmethod
2379 def stripComments(sCode):
2380 """ Returns sCode with comments removed. """
2381 off = 0;
2382 while off < len(sCode):
2383 off = sCode.find('/', off);
2384 if off < 0 or off + 1 >= len(sCode):
2385 break;
2386
2387 if sCode[off + 1] == '/':
2388 # C++ comment.
2389 offEnd = sCode.find('\n', off + 2);
2390 if offEnd < 0:
2391 return sCode[:off].rstrip();
2392 sCode = sCode[ : off] + sCode[offEnd : ];
2393 off += 1;
2394
2395 elif sCode[off + 1] == '*':
2396 # C comment
2397 offEnd = sCode.find('*/', off + 2);
2398 if offEnd < 0:
2399 return sCode[:off].rstrip();
2400 sSep = ' ';
2401 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2402 sSep = '';
2403 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2404 off += len(sSep);
2405
2406 else:
2407 # Not a comment.
2408 off += 1;
2409 return sCode;
2410
2411 @staticmethod
2412 def extractParam(sCode, offParam):
2413 """
2414 Extracts the parameter value at offParam in sCode.
2415 Returns stripped value and the end offset of the terminating ',' or ')'.
2416 """
2417 # Extract it.
2418 cNesting = 0;
2419 offStart = offParam;
2420 while offParam < len(sCode):
2421 ch = sCode[offParam];
2422 if ch == '(':
2423 cNesting += 1;
2424 elif ch == ')':
2425 if cNesting == 0:
2426 break;
2427 cNesting -= 1;
2428 elif ch == ',' and cNesting == 0:
2429 break;
2430 offParam += 1;
2431 return (sCode[offStart : offParam].strip(), offParam);
2432
2433 @staticmethod
2434 def extractParams(sCode, offOpenParen):
2435 """
2436 Parses a parameter list.
2437 Returns the list of parameter values and the offset of the closing parentheses.
2438 Returns (None, len(sCode)) on if no closing parentheses was found.
2439 """
2440 assert sCode[offOpenParen] == '(';
2441 asParams = [];
2442 off = offOpenParen + 1;
2443 while off < len(sCode):
2444 ch = sCode[off];
2445 if ch.isspace():
2446 off += 1;
2447 elif ch != ')':
2448 (sParam, off) = McBlock.extractParam(sCode, off);
2449 asParams.append(sParam);
2450 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2451 if sCode[off] == ',':
2452 off += 1;
2453 else:
2454 return (asParams, off);
2455 return (None, off);
2456
2457 @staticmethod
2458 def findClosingBraces(sCode, off, offStop):
2459 """
2460 Finds the matching '}' for the '{' at off in sCode.
2461 Returns offset of the matching '}' on success, otherwise -1.
2462
2463 Note! Does not take comments into account.
2464 """
2465 cDepth = 1;
2466 off += 1;
2467 while off < offStop:
2468 offClose = sCode.find('}', off, offStop);
2469 if offClose < 0:
2470 break;
2471 cDepth += sCode.count('{', off, offClose);
2472 cDepth -= 1;
2473 if cDepth == 0:
2474 return offClose;
2475 off = offClose + 1;
2476 return -1;
2477
2478 @staticmethod
2479 def countSpacesAt(sCode, off, offStop):
2480 """ Returns the number of space characters at off in sCode. """
2481 offStart = off;
2482 while off < offStop and sCode[off].isspace():
2483 off += 1;
2484 return off - offStart;
2485
2486 @staticmethod
2487 def skipSpacesAt(sCode, off, offStop):
2488 """ Returns first offset at or after off for a non-space character. """
2489 return off + McBlock.countSpacesAt(sCode, off, offStop);
2490
2491 @staticmethod
2492 def isSubstrAt(sStr, off, sSubStr):
2493 """ Returns true of sSubStr is found at off in sStr. """
2494 return sStr[off : off + len(sSubStr)] == sSubStr;
2495
2496 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2497 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2498 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2499 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2500 + r')');
2501
2502 kaasConditions = (
2503 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2504 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2505 );
2506 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2507 """
2508 Decodes sRawCode[off : offStop].
2509
2510 Returns list of McStmt instances.
2511 Raises ParserException on failure.
2512 """
2513 if offStop < 0:
2514 offStop = len(sRawCode);
2515 aoStmts = [];
2516 while off < offStop:
2517 ch = sRawCode[off];
2518
2519 #
2520 # Skip spaces and comments.
2521 #
2522 if ch.isspace():
2523 off += 1;
2524
2525 elif ch == '/':
2526 ch = sRawCode[off + 1];
2527 if ch == '/': # C++ comment.
2528 off = sRawCode.find('\n', off + 2);
2529 if off < 0:
2530 break;
2531 off += 1;
2532 elif ch == '*': # C comment.
2533 off = sRawCode.find('*/', off + 2);
2534 if off < 0:
2535 break;
2536 off += 2;
2537 else:
2538 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2539
2540 #
2541 # Is it a MC statement.
2542 #
2543 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2544 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2545 # Extract it and strip comments from it.
2546 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2547 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2548 else: iCond = -1;
2549 if iCond < 0:
2550 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2551 if offEnd <= off:
2552 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2553 else:
2554 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2555 if offEnd <= off:
2556 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2557 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2558 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2559 offEnd -= 1;
2560 while offEnd > off and sRawCode[offEnd - 1].isspace():
2561 offEnd -= 1;
2562
2563 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2564
2565 # Isolate the statement name.
2566 offOpenParen = sRawStmt.find('(');
2567 if offOpenParen < 0:
2568 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2569 sName = sRawStmt[: offOpenParen].strip();
2570
2571 # Extract the parameters.
2572 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2573 if asParams is None:
2574 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2575 if offCloseParen + 1 != len(sRawStmt):
2576 self.raiseDecodeError(sRawCode, off,
2577 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2578
2579 # Hand it to the handler.
2580 fnParser = g_dMcStmtParsers.get(sName);
2581 if not fnParser:
2582 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2583 fnParser = fnParser[0];
2584 oStmt = fnParser(self, sName, asParams);
2585 if not isinstance(oStmt, (list, tuple)):
2586 aoStmts.append(oStmt);
2587 else:
2588 aoStmts.extend(oStmt);
2589
2590 #
2591 # If conditional, we need to parse the whole statement.
2592 #
2593 # For reasons of simplicity, we assume the following structure
2594 # and parse each branch in a recursive call:
2595 # IEM_MC_IF_XXX() {
2596 # IEM_MC_WHATEVER();
2597 # } IEM_MC_ELSE() {
2598 # IEM_MC_WHATEVER();
2599 # } IEM_MC_ENDIF();
2600 #
2601 if iCond >= 0:
2602 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2603 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2604
2605 # Find start of the IF block:
2606 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2607 if sRawCode[offBlock1] != '{':
2608 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2609
2610 # Find the end of it.
2611 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2612 if offBlock1End < 0:
2613 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2614
2615 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2616
2617 # Is there an else section?
2618 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2619 sElseNm = self.kaasConditions[iCond][1];
2620 if self.isSubstrAt(sRawCode, off, sElseNm):
2621 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2622 if sRawCode[off] != '(':
2623 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2624 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2625 if sRawCode[off] != ')':
2626 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2627
2628 # Find start of the ELSE block.
2629 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2630 if sRawCode[offBlock2] != '{':
2631 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2632
2633 # Find the end of it.
2634 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2635 if offBlock2End < 0:
2636 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2637
2638 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2639 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2640
2641 # Parse past the endif statement.
2642 sEndIfNm = self.kaasConditions[iCond][2];
2643 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2644 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2645 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2646 if sRawCode[off] != '(':
2647 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ')':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2651 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2652 if sRawCode[off] != ';':
2653 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2654 off += 1;
2655
2656 else:
2657 # Advance.
2658 off = offEnd + 1;
2659
2660 #
2661 # Otherwise it must be a C/C++ statement of sorts.
2662 #
2663 else:
2664 # Find the end of the statement. if and else requires special handling.
2665 sCondExpr = None;
2666 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2667 if oMatch:
2668 if oMatch.group(1)[-1] == '(':
2669 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2670 else:
2671 offEnd = oMatch.end();
2672 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2673 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2674 elif ch == '#':
2675 offEnd = sRawCode.find('\n', off, offStop);
2676 if offEnd < 0:
2677 offEnd = offStop;
2678 offEnd -= 1;
2679 while offEnd > off and sRawCode[offEnd - 1].isspace():
2680 offEnd -= 1;
2681 else:
2682 offEnd = sRawCode.find(';', off);
2683 if offEnd < 0:
2684 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2685
2686 # Check this and the following statement whether it might have
2687 # something to do with decoding. This is a statement filter
2688 # criteria when generating the threaded functions blocks.
2689 offNextEnd = sRawCode.find(';', offEnd + 1);
2690 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2691 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2692 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2693 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2694 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2695 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2696 );
2697
2698 if not oMatch:
2699 if ch != '#':
2700 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2701 else:
2702 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2703 off = offEnd + 1;
2704 elif oMatch.group(1).startswith('if'):
2705 #
2706 # if () xxx [else yyy] statement.
2707 #
2708 oStmt = McCppCond(sCondExpr, fDecode);
2709 aoStmts.append(oStmt);
2710 off = offEnd + 1;
2711
2712 # Following the if () we can either have a {} containing zero or more statements
2713 # or we have a single statement.
2714 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2715 if sRawCode[offBlock1] == '{':
2716 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2717 if offBlock1End < 0:
2718 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2719 offBlock1 += 1;
2720 else:
2721 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2722 if offBlock1End < 0:
2723 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2724
2725 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2726
2727 # The else is optional and can likewise be followed by {} or a single statement.
2728 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2729 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2730 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2731 if sRawCode[offBlock2] == '{':
2732 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2733 if offBlock2End < 0:
2734 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2735 offBlock2 += 1;
2736 else:
2737 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2738 if offBlock2End < 0:
2739 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2740
2741 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2742 off = offBlock2End + 1;
2743
2744 elif oMatch.group(1) == 'else':
2745 # Problematic 'else' branch, typically involving #ifdefs.
2746 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2747
2748 return aoStmts;
2749
2750 def decode(self):
2751 """
2752 Decodes the block, populating self.aoStmts if necessary.
2753 Returns the statement list.
2754 Raises ParserException on failure.
2755 """
2756 if not self.aoStmts:
2757 self.aoStmts = self.decodeCode(''.join(self.asLines));
2758 return self.aoStmts;
2759
2760
2761 def checkForTooEarlyEffSegUse(self, aoStmts):
2762 """
2763 Checks if iEffSeg is used before the effective address has been decoded.
2764 Returns None on success, error string on failure.
2765
2766 See r158454 for an example of this issue.
2767 """
2768
2769 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2770 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2771 # as we're ASSUMING these will not occur before address calculation.
2772 for iStmt, oStmt in enumerate(aoStmts):
2773 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2774 while iStmt > 0:
2775 iStmt -= 1;
2776 oStmt = aoStmts[iStmt];
2777 for sArg in oStmt.asParams:
2778 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2779 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2780 break;
2781 return None;
2782
2783 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2784 kdDecodeCppStmtOkayAfterDone = {
2785 'IEMOP_HLP_IN_VMX_OPERATION': True,
2786 'IEMOP_HLP_VMX_INSTR': True,
2787 };
2788
2789 def checkForDoneDecoding(self, aoStmts):
2790 """
2791 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2792 invocation.
2793 Returns None on success, error string on failure.
2794
2795 This ensures safe instruction restarting in case the recompiler runs
2796 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2797 entries).
2798 """
2799
2800 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2801 # don't need to look.
2802 cIemOpHlpDone = 0;
2803 for iStmt, oStmt in enumerate(aoStmts):
2804 if oStmt.isCppStmt():
2805 #print('dbg: #%u[%u]: %s %s (%s)'
2806 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2807
2808 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2809 if oMatch:
2810 sFirstWord = oMatch.group(1);
2811 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2812 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2813 cIemOpHlpDone += 1;
2814 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2815 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2816 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2817 else:
2818 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2819 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2820 cIemOpHlpDone += 1;
2821 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2822 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2823 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2824 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2825 if cIemOpHlpDone == 1:
2826 return None;
2827 if cIemOpHlpDone > 1:
2828 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2829 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2830
2831 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2832 """
2833 Checks that the register references are placed after register fetches
2834 from the same register class.
2835 Returns None on success, error string on failure.
2836
2837 Example:
2838 SHL CH, CL
2839
2840 If the CH reference is created first, the fetching of CL will cause the
2841 RCX guest register to have an active shadow register when it's being
2842 updated. The shadow register will then be stale after the SHL operation
2843 completes, without us noticing.
2844
2845 It's easier to ensure we've got correct code than complicating the
2846 recompiler code with safeguards here.
2847 """
2848 for iStmt, oStmt in enumerate(aoStmts):
2849 if not oStmt.isCppStmt():
2850 offRef = oStmt.sName.find("_REF_");
2851 if offRef > 0:
2852 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2853 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2854 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2855 sClass = 'FPUREG';
2856 else:
2857 offUnderscore = oStmt.sName.find('_', offRef + 5);
2858 if offUnderscore > 0:
2859 assert offUnderscore > offRef;
2860 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2861 else:
2862 sClass = oStmt.sName[offRef + 5];
2863 asRegRefClasses[sClass] = True;
2864 else:
2865 offFetch = oStmt.sName.find("_FETCH_");
2866 if offFetch > 0:
2867 sClass = oStmt.sName[offFetch + 7 : ];
2868 if not sClass.startswith("MEM"):
2869 offUnderscore = sClass.find('_');
2870 if offUnderscore >= 0:
2871 assert offUnderscore > 0;
2872 sClass = sClass[:offUnderscore];
2873 if sClass in asRegRefClasses:
2874 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2875 % (iStmt + 1, oStmt.sName,);
2876
2877 # Go into branches.
2878 if isinstance(oStmt, McStmtCond):
2879 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2880 if sRet:
2881 return sRet;
2882 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2883 if sRet:
2884 return sRet;
2885 return None;
2886
2887 def check(self):
2888 """
2889 Performs some sanity checks on the block.
2890 Returns error string list, empty if all is fine.
2891 """
2892 aoStmts = self.decode();
2893 asRet = [];
2894
2895 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2896 if sRet:
2897 asRet.append(sRet);
2898
2899 sRet = self.checkForDoneDecoding(aoStmts);
2900 if sRet:
2901 asRet.append(sRet);
2902
2903 sRet = self.checkForFetchAfterRef(aoStmts, {});
2904 if sRet:
2905 asRet.append(sRet);
2906
2907 return asRet;
2908
2909
2910## Temporary flag for enabling / disabling experimental MCs depending on the
2911## SIMD register allocator.
2912g_fNativeSimd = True;
2913
2914## IEM_MC_XXX -> parser + info dictionary.
2915#
2916# The info columns:
2917# - col 1+0: boolean entry indicating whether the statement modifies state and
2918# must not be used before IEMOP_HL_DONE_*.
2919# - col 1+1: boolean entry indicating similar to the previous column but is
2920# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2921# The difference is that most IEM_MC_IF_XXX entries are False here.
2922# - col 1+2: boolean entry indicating native recompiler support.
2923#
2924# The raw table was generated via the following command
2925# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2926# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2927# pylint: disable=line-too-long
2928g_dMcStmtParsers = {
2929 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2930 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2931 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2932 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2933 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2934 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2935 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2936 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2937 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2938 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2939 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2940 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2941 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2942 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2943 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2944 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2945 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2946 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2947 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2948 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2949 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2950 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2951 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2952 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2953 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2954 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2955 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2956 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2957 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2958 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2959 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2960 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2961 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2962 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2963 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2964 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2965 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2966 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2967 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2968 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2969 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2970 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2971 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2972 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2973 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2974 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2975 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2976 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2977 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2978 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2979 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2980 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2981 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2982 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2983 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2984 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2985 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2986 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2987 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2988 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2989 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2990 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2991 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2992 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
2993 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
2994 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2995 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2996 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2997 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2998 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2999 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3000 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
3001 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3002 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3003 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3004 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3005 'IEM_MC_COMMIT_EFLAGS_OPT': (McBlock.parseMcGeneric, True, True, True, ),
3006 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3007 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3008 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3009 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3010 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3011 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3012 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3013 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3014 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3015 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3016 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3017 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3020 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3021 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3022 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3023 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3024 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3025 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3026 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3027 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3028 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3029 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3030 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3031 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3032 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3033 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3034 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3035 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3036 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3037 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3038 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3045 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3046 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3047 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3048 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3049 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3050 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3051 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3052 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3053 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3054 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3055 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3056 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3057 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3058 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3059 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3060 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3061 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3062 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3063 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3064 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3065 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3066 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3067 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3068 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3069 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3070 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3071 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3072 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3073 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3076 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3079 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3080 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3081 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3082 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3083 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3084 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3085 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3086 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3087 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3088 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3089 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3090 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3091 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3092 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3093 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3094 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3095 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3096 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3097 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3098 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3099 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3100 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3101 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3102 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3103 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3104 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3105 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3106 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3107 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3115 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3116 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3117 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3118 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3119 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3120 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3121 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3122 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3123 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3124 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3125 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3126 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3127 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3128 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3129 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3130 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3131 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3132 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3133 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3134 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3135 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3136 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3137 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3138 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3139 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3140 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3141 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3142 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3143 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3144 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, True, ),
3145 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, g_fNativeSimd),
3146 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3147 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3148 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3149 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3150 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3151 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3152 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3153 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3154 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3155 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3156 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3157 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3158 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3159 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3160 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3161 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3162 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3163 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3164 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3165 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3166 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3167 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3168 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3169 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3170 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3172 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3173 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3176 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3177 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3178 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3179 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3198 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3199 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3200 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3201 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3203 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3204 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3205 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3206 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3207 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3208 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3209 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3215 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3216 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3217 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3218 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3219 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3220 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3221 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3222 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3223 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3224 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3225 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3226 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3227 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3228 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3229 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3230 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3231 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3232 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3233 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3234 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3235 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3236 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3237 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3238 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3239 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3240 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3241 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, True, ),
3242 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3243 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3244 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3245 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3247 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3249 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3250 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3251 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3252 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3253 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3254 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3255 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3256 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3257 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3258 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3259 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3260 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3261 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3262 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3263 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3264 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3265 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3266 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3267 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3268 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3269 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3270 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3271 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3272 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3273 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3274 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3275 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3276 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3277 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3278 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3279 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3280 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3281 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3282 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3283 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3284 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3285 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3286 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3287 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3288 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3289 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3290 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3291 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3292 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3293 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3294 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3295 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3296 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3297 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3298 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3299 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3300 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3301 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3302 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3303 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3304 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3311 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3312 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3313 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3314 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3315 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3316 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3317 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3318 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3319 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3320 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3321 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3322 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3323 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3324 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3325 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3326 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3327 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3328 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3337 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3338 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3339 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3340 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3341 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3342 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3346 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3347 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3348 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3349 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3350 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3351 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3352 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3353 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3354 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3355 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3358 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3359 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3360 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3361 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3362 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3363};
3364# pylint: enable=line-too-long
3365
3366## List of microcode blocks.
3367g_aoMcBlocks = [] # type: List[McBlock]
3368
3369
3370
3371class ParserException(Exception):
3372 """ Parser exception """
3373 def __init__(self, sMessage):
3374 Exception.__init__(self, sMessage);
3375
3376
3377class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3378 """
3379 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3380 """
3381
3382 ## @name Parser state.
3383 ## @{
3384 kiCode = 0;
3385 kiCommentMulti = 1;
3386 ## @}
3387
3388 class Macro(object):
3389 """ Macro """
3390 def __init__(self, sName, asArgs, sBody, iLine):
3391 self.sName = sName; ##< The macro name.
3392 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3393 self.sBody = sBody;
3394 self.iLine = iLine;
3395 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3396
3397 @staticmethod
3398 def _needSpace(ch):
3399 """ This is just to make the expanded output a bit prettier. """
3400 return ch.isspace() and ch != '(';
3401
3402 def expandMacro(self, oParent, asArgs = None):
3403 """ Expands the macro body with the given arguments. """
3404 _ = oParent;
3405 sBody = self.sBody;
3406
3407 if self.oReArgMatch:
3408 assert len(asArgs) == len(self.asArgs);
3409 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3410
3411 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3412 oMatch = self.oReArgMatch.search(sBody);
3413 while oMatch:
3414 sName = oMatch.group(2);
3415 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3416 sValue = dArgs[sName];
3417 sPre = '';
3418 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3419 sPre = ' ';
3420 sPost = '';
3421 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3422 sPost = ' ';
3423 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3424 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3425 else:
3426 assert not asArgs;
3427
3428 return sBody;
3429
3430 class PreprocessorConditional(object):
3431 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3432
3433 ## Known defines.
3434 # - A value of 1 indicates that it's always defined.
3435 # - A value of 0 if it's always undefined
3436 # - A value of -1 if it's an arch and it depends of script parameters.
3437 # - A value of -2 if it's not recognized when filtering MC blocks.
3438 kdKnownDefines = {
3439 'IEM_WITH_ONE_BYTE_TABLE': 1,
3440 'IEM_WITH_TWO_BYTE_TABLE': 1,
3441 'IEM_WITH_THREE_0F_38': 1,
3442 'IEM_WITH_THREE_0F_3A': 1,
3443 'IEM_WITH_THREE_BYTE_TABLES': 1,
3444 'IEM_WITH_3DNOW': 1,
3445 'IEM_WITH_3DNOW_TABLE': 1,
3446 'IEM_WITH_VEX': 1,
3447 'IEM_WITH_VEX_TABLES': 1,
3448 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3449 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3450 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3451 'LOG_ENABLED': 1,
3452 'RT_WITHOUT_PRAGMA_ONCE': 0,
3453 'TST_IEM_CHECK_MC': 0,
3454 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3455 'RT_ARCH_AMD64': -1,
3456 'RT_ARCH_ARM64': -1,
3457 'RT_ARCH_ARM32': -1,
3458 'RT_ARCH_X86': -1,
3459 'RT_ARCH_SPARC': -1,
3460 'RT_ARCH_SPARC64': -1,
3461 };
3462 kdBuildArchToIprt = {
3463 'amd64': 'RT_ARCH_AMD64',
3464 'arm64': 'RT_ARCH_ARM64',
3465 'sparc32': 'RT_ARCH_SPARC64',
3466 };
3467 ## For parsing the next defined(xxxx).
3468 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3469
3470 def __init__(self, sType, sExpr):
3471 self.sType = sType;
3472 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3473 self.aoElif = [] # type: List[PreprocessorConditional]
3474 self.fInElse = [];
3475 if sType in ('if', 'elif'):
3476 self.checkExpression(sExpr);
3477 else:
3478 self.checkSupportedDefine(sExpr)
3479
3480 @staticmethod
3481 def checkSupportedDefine(sDefine):
3482 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3483 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3484 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3485 return True;
3486 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3487 return True;
3488 raise Exception('Unsupported define: %s' % (sDefine,));
3489
3490 @staticmethod
3491 def checkExpression(sExpr):
3492 """ Check that the expression is supported. Raises exception if not. """
3493 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3494 if sExpr in ('0', '1'):
3495 return True;
3496
3497 off = 0;
3498 cParan = 0;
3499 while off < len(sExpr):
3500 ch = sExpr[off];
3501
3502 # Unary operator or parentheses:
3503 if ch in ('(', '!'):
3504 if ch == '(':
3505 cParan += 1;
3506 off += 1;
3507 else:
3508 # defined(xxxx)
3509 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3510 if oMatch:
3511 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3512 elif sExpr[off:] != '1':
3513 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3514 off = oMatch.end();
3515
3516 # Look for closing parentheses.
3517 while off < len(sExpr) and sExpr[off].isspace():
3518 off += 1;
3519 if cParan > 0:
3520 while off < len(sExpr) and sExpr[off] == ')':
3521 if cParan <= 0:
3522 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3523 cParan -= 1;
3524 off += 1;
3525 while off < len(sExpr) and sExpr[off].isspace():
3526 off += 1;
3527
3528 # Look for binary operator.
3529 if off >= len(sExpr):
3530 break;
3531 if sExpr[off:off + 2] in ('||', '&&'):
3532 off += 2;
3533 else:
3534 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3535
3536 # Skip spaces.
3537 while off < len(sExpr) and sExpr[off].isspace():
3538 off += 1;
3539 if cParan != 0:
3540 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3541 return True;
3542
3543 @staticmethod
3544 def isArchIncludedInExpr(sExpr, sArch):
3545 """ Checks if sArch is included in the given expression. """
3546 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3547 if sExpr == '0':
3548 return False;
3549 if sExpr == '1':
3550 return True;
3551 off = 0;
3552 while off < len(sExpr):
3553 # defined(xxxx)
3554 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3555 if not oMatch:
3556 if sExpr[off:] == '1':
3557 return True;
3558 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3559 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3560 return True;
3561 off = oMatch.end();
3562
3563 # Look for OR operator.
3564 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3565 off += 1;
3566 if off >= len(sExpr):
3567 break;
3568 if sExpr.startswith('||'):
3569 off += 2;
3570 else:
3571 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3572
3573 return False;
3574
3575 @staticmethod
3576 def matchArch(sDefine, sArch):
3577 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3578 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3579
3580 @staticmethod
3581 def matchDefined(sExpr, sArch):
3582 """ Check the result of an ifdef/ifndef expression, given sArch. """
3583 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3584 if iDefine == -2:
3585 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3586 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3587
3588 def isArchIncludedInPrimaryBlock(self, sArch):
3589 """ Checks if sArch is included in the (primary) 'if' block. """
3590 if self.sType == 'ifdef':
3591 return self.matchDefined(self.sExpr, sArch);
3592 if self.sType == 'ifndef':
3593 return not self.matchDefined(self.sExpr, sArch);
3594 return self.isArchIncludedInExpr(self.sExpr, sArch);
3595
3596 @staticmethod
3597 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3598 """ Checks if sArch is included in the current conditional block. """
3599 _ = iLine;
3600 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3601 for oCond in aoCppCondStack:
3602 if oCond.isArchIncludedInPrimaryBlock(sArch):
3603 if oCond.aoElif or oCond.fInElse:
3604 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3605 return False;
3606 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3607 else:
3608 fFine = False;
3609 for oElifCond in oCond.aoElif:
3610 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3611 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3612 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3613 return False;
3614 fFine = True;
3615 if not fFine and not oCond.fInElse:
3616 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3617 return False;
3618 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3619 return True;
3620
3621 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3622 self.sSrcFile = sSrcFile;
3623 self.asLines = asLines;
3624 self.iLine = 0;
3625 self.iState = self.kiCode;
3626 self.sComment = '';
3627 self.iCommentLine = 0;
3628 self.aoCurInstrs = [] # type: List[Instruction]
3629 self.oCurFunction = None # type: DecoderFunction
3630 self.iMcBlockInFunc = 0;
3631 self.oCurMcBlock = None # type: McBlock
3632 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3633 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3634 if oInheritMacrosFrom:
3635 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3636 self.oReMacros = oInheritMacrosFrom.oReMacros;
3637 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3638 self.sHostArch = sHostArch;
3639
3640 assert sDefaultMap in g_dInstructionMaps;
3641 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3642
3643 self.cTotalInstr = 0;
3644 self.cTotalStubs = 0;
3645 self.cTotalTagged = 0;
3646 self.cTotalMcBlocks = 0;
3647
3648 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3649 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3650 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3651 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3652 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3653 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3654 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3655 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3656 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3657 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3658 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3659 self.fDebug = True;
3660 self.fDebugMc = False;
3661 self.fDebugPreproc = False;
3662
3663 self.dTagHandlers = {
3664 '@opbrief': self.parseTagOpBrief,
3665 '@opdesc': self.parseTagOpDesc,
3666 '@opmnemonic': self.parseTagOpMnemonic,
3667 '@op1': self.parseTagOpOperandN,
3668 '@op2': self.parseTagOpOperandN,
3669 '@op3': self.parseTagOpOperandN,
3670 '@op4': self.parseTagOpOperandN,
3671 '@oppfx': self.parseTagOpPfx,
3672 '@opmaps': self.parseTagOpMaps,
3673 '@opcode': self.parseTagOpcode,
3674 '@opcodesub': self.parseTagOpcodeSub,
3675 '@openc': self.parseTagOpEnc,
3676 #@opfltest: Lists all flags that will be used as input in some way.
3677 '@opfltest': self.parseTagOpEFlags,
3678 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3679 '@opflmodify': self.parseTagOpEFlags,
3680 #@opflclear: Lists all flags that will be set (set to 1).
3681 '@opflset': self.parseTagOpEFlags,
3682 #@opflclear: Lists all flags that will be cleared (set to 0).
3683 '@opflclear': self.parseTagOpEFlags,
3684 #@opflundef: List of flag documented as undefined.
3685 '@opflundef': self.parseTagOpEFlags,
3686 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3687 '@opflclass': self.parseTagOpEFlagsClass,
3688 '@ophints': self.parseTagOpHints,
3689 '@opdisenum': self.parseTagOpDisEnum,
3690 '@opmincpu': self.parseTagOpMinCpu,
3691 '@opcpuid': self.parseTagOpCpuId,
3692 '@opgroup': self.parseTagOpGroup,
3693 '@opunused': self.parseTagOpUnusedInvalid,
3694 '@opinvalid': self.parseTagOpUnusedInvalid,
3695 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3696 '@optest': self.parseTagOpTest,
3697 '@optestign': self.parseTagOpTestIgnore,
3698 '@optestignore': self.parseTagOpTestIgnore,
3699 '@opcopytests': self.parseTagOpCopyTests,
3700 '@oponly': self.parseTagOpOnlyTest,
3701 '@oponlytest': self.parseTagOpOnlyTest,
3702 '@opxcpttype': self.parseTagOpXcptType,
3703 '@opstats': self.parseTagOpStats,
3704 '@opfunction': self.parseTagOpFunction,
3705 '@opdone': self.parseTagOpDone,
3706 };
3707 for i in range(48):
3708 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3709 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3710
3711 self.asErrors = [];
3712
3713 def raiseError(self, sMessage):
3714 """
3715 Raise error prefixed with the source and line number.
3716 """
3717 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3718
3719 def raiseCommentError(self, iLineInComment, sMessage):
3720 """
3721 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3722 """
3723 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3724
3725 def error(self, sMessage):
3726 """
3727 Adds an error.
3728 returns False;
3729 """
3730 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3731 return False;
3732
3733 def errorOnLine(self, iLine, sMessage):
3734 """
3735 Adds an error.
3736 returns False;
3737 """
3738 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3739 return False;
3740
3741 def errorComment(self, iLineInComment, sMessage):
3742 """
3743 Adds a comment error.
3744 returns False;
3745 """
3746 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3747 return False;
3748
3749 def printErrors(self):
3750 """
3751 Print the errors to stderr.
3752 Returns number of errors.
3753 """
3754 if self.asErrors:
3755 sys.stderr.write(u''.join(self.asErrors));
3756 return len(self.asErrors);
3757
3758 def debug(self, sMessage):
3759 """
3760 For debugging.
3761 """
3762 if self.fDebug:
3763 print('debug: %s' % (sMessage,), file = sys.stderr);
3764
3765 def stripComments(self, sLine):
3766 """
3767 Returns sLine with comments stripped.
3768
3769 Complains if traces of incomplete multi-line comments are encountered.
3770 """
3771 sLine = self.oReComment.sub(" ", sLine);
3772 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3773 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3774 return sLine;
3775
3776 def parseFunctionTable(self, sLine):
3777 """
3778 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3779
3780 Note! Updates iLine as it consumes the whole table.
3781 """
3782
3783 #
3784 # Extract the table name.
3785 #
3786 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3787 oMap = g_dInstructionMapsByIemName.get(sName);
3788 if not oMap:
3789 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3790 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3791
3792 #
3793 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3794 # entries per byte:
3795 # no prefix, 066h prefix, f3h prefix, f2h prefix
3796 # Those tables has 256 & 32 entries respectively.
3797 #
3798 cEntriesPerByte = 4;
3799 cValidTableLength = 1024;
3800 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3801
3802 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3803 if oEntriesMatch:
3804 cEntriesPerByte = 1;
3805 cValidTableLength = int(oEntriesMatch.group(1));
3806 asPrefixes = (None,);
3807
3808 #
3809 # The next line should be '{' and nothing else.
3810 #
3811 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3812 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3813 self.iLine += 1;
3814
3815 #
3816 # Parse till we find the end of the table.
3817 #
3818 iEntry = 0;
3819 while self.iLine < len(self.asLines):
3820 # Get the next line and strip comments and spaces (assumes no
3821 # multi-line comments).
3822 sLine = self.asLines[self.iLine];
3823 self.iLine += 1;
3824 sLine = self.stripComments(sLine).strip();
3825
3826 # Split the line up into entries, expanding IEMOP_X4 usage.
3827 asEntries = sLine.split(',');
3828 for i in range(len(asEntries) - 1, -1, -1):
3829 sEntry = asEntries[i].strip();
3830 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3831 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3832 asEntries.insert(i + 1, sEntry);
3833 asEntries.insert(i + 1, sEntry);
3834 asEntries.insert(i + 1, sEntry);
3835 if sEntry:
3836 asEntries[i] = sEntry;
3837 else:
3838 del asEntries[i];
3839
3840 # Process the entries.
3841 for sEntry in asEntries:
3842 if sEntry in ('};', '}'):
3843 if iEntry != cValidTableLength:
3844 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3845 return True;
3846 if sEntry.startswith('iemOp_Invalid'):
3847 pass; # skip
3848 else:
3849 # Look up matching instruction by function.
3850 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3851 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3852 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3853 if aoInstr:
3854 if not isinstance(aoInstr, list):
3855 aoInstr = [aoInstr,];
3856 oInstr = None;
3857 for oCurInstr in aoInstr:
3858 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3859 pass;
3860 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3861 oCurInstr.sPrefix = sPrefix;
3862 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3863 oCurInstr.sOpcode = sOpcode;
3864 oCurInstr.sPrefix = sPrefix;
3865 else:
3866 continue;
3867 oInstr = oCurInstr;
3868 break;
3869 if not oInstr:
3870 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3871 aoInstr.append(oInstr);
3872 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3873 g_aoAllInstructions.append(oInstr);
3874 oMap.aoInstructions.append(oInstr);
3875 else:
3876 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3877 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3878 iEntry += 1;
3879
3880 return self.error('Unexpected end of file in PFNIEMOP table');
3881
3882 def addInstruction(self, iLine = None):
3883 """
3884 Adds an instruction.
3885 """
3886 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3887 g_aoAllInstructions.append(oInstr);
3888 self.aoCurInstrs.append(oInstr);
3889 return oInstr;
3890
3891 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3892 """
3893 Derives the mnemonic and operands from a IEM stats base name like string.
3894 """
3895 if oInstr.sMnemonic is None:
3896 asWords = sStats.split('_');
3897 oInstr.sMnemonic = asWords[0].lower();
3898 if len(asWords) > 1 and not oInstr.aoOperands:
3899 for sType in asWords[1:]:
3900 if sType in g_kdOpTypes:
3901 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3902 else:
3903 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3904 return False;
3905 return True;
3906
3907 def doneInstructionOne(self, oInstr, iLine):
3908 """
3909 Complete the parsing by processing, validating and expanding raw inputs.
3910 """
3911 assert oInstr.iLineCompleted is None;
3912 oInstr.iLineCompleted = iLine;
3913
3914 #
3915 # Specified instructions.
3916 #
3917 if oInstr.cOpTags > 0:
3918 if oInstr.sStats is None:
3919 pass;
3920
3921 #
3922 # Unspecified legacy stuff. We generally only got a few things to go on here.
3923 # /** Opcode 0x0f 0x00 /0. */
3924 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3925 #
3926 else:
3927 #if oInstr.sRawOldOpcodes:
3928 #
3929 #if oInstr.sMnemonic:
3930 pass;
3931
3932 #
3933 # Common defaults.
3934 #
3935
3936 # Guess mnemonic and operands from stats if the former is missing.
3937 if oInstr.sMnemonic is None:
3938 if oInstr.sStats is not None:
3939 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3940 elif oInstr.sFunction is not None:
3941 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3942
3943 # Derive the disassembler op enum constant from the mnemonic.
3944 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3945 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3946
3947 # Derive the IEM statistics base name from mnemonic and operand types.
3948 if oInstr.sStats is None:
3949 if oInstr.sFunction is not None:
3950 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3951 elif oInstr.sMnemonic is not None:
3952 oInstr.sStats = oInstr.sMnemonic;
3953 for oOperand in oInstr.aoOperands:
3954 if oOperand.sType:
3955 oInstr.sStats += '_' + oOperand.sType;
3956
3957 # Derive the IEM function name from mnemonic and operand types.
3958 if oInstr.sFunction is None:
3959 if oInstr.sMnemonic is not None:
3960 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3961 for oOperand in oInstr.aoOperands:
3962 if oOperand.sType:
3963 oInstr.sFunction += '_' + oOperand.sType;
3964 elif oInstr.sStats:
3965 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3966
3967 #
3968 # Apply default map and then add the instruction to all it's groups.
3969 #
3970 if not oInstr.aoMaps:
3971 oInstr.aoMaps = [ self.oDefaultMap, ];
3972 for oMap in oInstr.aoMaps:
3973 oMap.aoInstructions.append(oInstr);
3974
3975 #
3976 # Derive encoding from operands and maps.
3977 #
3978 if oInstr.sEncoding is None:
3979 if not oInstr.aoOperands:
3980 if oInstr.fUnused and oInstr.sSubOpcode:
3981 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3982 else:
3983 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3984 elif oInstr.aoOperands[0].usesModRM():
3985 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3986 or oInstr.onlyInVexMaps():
3987 oInstr.sEncoding = 'VEX.ModR/M';
3988 else:
3989 oInstr.sEncoding = 'ModR/M';
3990
3991 #
3992 # Check the opstat value and add it to the opstat indexed dictionary.
3993 #
3994 if oInstr.sStats:
3995 if oInstr.sStats not in g_dAllInstructionsByStat:
3996 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3997 else:
3998 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3999 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
4000
4001 #
4002 # Add to function indexed dictionary. We allow multiple instructions per function.
4003 #
4004 if oInstr.sFunction:
4005 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4006 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4007 else:
4008 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4009
4010 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4011 return True;
4012
4013 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4014 """
4015 Done with current instruction.
4016 """
4017 for oInstr in self.aoCurInstrs:
4018 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4019 if oInstr.fStub:
4020 self.cTotalStubs += 1;
4021
4022 self.cTotalInstr += len(self.aoCurInstrs);
4023
4024 self.sComment = '';
4025 self.aoCurInstrs = [];
4026 if fEndOfFunction:
4027 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4028 if self.oCurFunction:
4029 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4030 self.oCurFunction = None;
4031 self.iMcBlockInFunc = 0;
4032 return True;
4033
4034 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4035 """
4036 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4037 is False, only None values and empty strings are replaced.
4038 """
4039 for oInstr in self.aoCurInstrs:
4040 if fOverwrite is not True:
4041 oOldValue = getattr(oInstr, sAttrib);
4042 if oOldValue is not None:
4043 continue;
4044 setattr(oInstr, sAttrib, oValue);
4045
4046 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4047 """
4048 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4049 If fOverwrite is False, only None values and empty strings are replaced.
4050 """
4051 for oInstr in self.aoCurInstrs:
4052 aoArray = getattr(oInstr, sAttrib);
4053 while len(aoArray) <= iEntry:
4054 aoArray.append(None);
4055 if fOverwrite is True or aoArray[iEntry] is None:
4056 aoArray[iEntry] = oValue;
4057
4058 def parseCommentOldOpcode(self, asLines):
4059 """ Deals with 'Opcode 0xff /4' like comments """
4060 asWords = asLines[0].split();
4061 if len(asWords) >= 2 \
4062 and asWords[0] == 'Opcode' \
4063 and ( asWords[1].startswith('0x')
4064 or asWords[1].startswith('0X')):
4065 asWords = asWords[:1];
4066 for iWord, sWord in enumerate(asWords):
4067 if sWord.startswith('0X'):
4068 sWord = '0x' + sWord[:2];
4069 asWords[iWord] = asWords;
4070 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4071
4072 return False;
4073
4074 def ensureInstructionForOpTag(self, iTagLine):
4075 """ Ensure there is an instruction for the op-tag being parsed. """
4076 if not self.aoCurInstrs:
4077 self.addInstruction(self.iCommentLine + iTagLine);
4078 for oInstr in self.aoCurInstrs:
4079 oInstr.cOpTags += 1;
4080 if oInstr.cOpTags == 1:
4081 self.cTotalTagged += 1;
4082 return self.aoCurInstrs[-1];
4083
4084 @staticmethod
4085 def flattenSections(aasSections):
4086 """
4087 Flattens multiline sections into stripped single strings.
4088 Returns list of strings, on section per string.
4089 """
4090 asRet = [];
4091 for asLines in aasSections:
4092 if asLines:
4093 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4094 return asRet;
4095
4096 @staticmethod
4097 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4098 """
4099 Flattens sections into a simple stripped string with newlines as
4100 section breaks. The final section does not sport a trailing newline.
4101 """
4102 # Typical: One section with a single line.
4103 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4104 return aasSections[0][0].strip();
4105
4106 sRet = '';
4107 for iSection, asLines in enumerate(aasSections):
4108 if asLines:
4109 if iSection > 0:
4110 sRet += sSectionSep;
4111 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4112 return sRet;
4113
4114
4115
4116 ## @name Tag parsers
4117 ## @{
4118
4119 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4120 """
4121 Tag: @opbrief
4122 Value: Text description, multiple sections, appended.
4123
4124 Brief description. If not given, it's the first sentence from @opdesc.
4125 """
4126 oInstr = self.ensureInstructionForOpTag(iTagLine);
4127
4128 # Flatten and validate the value.
4129 sBrief = self.flattenAllSections(aasSections);
4130 if not sBrief:
4131 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4132 if sBrief[-1] != '.':
4133 sBrief = sBrief + '.';
4134 if len(sBrief) > 180:
4135 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4136 offDot = sBrief.find('.');
4137 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4138 offDot = sBrief.find('.', offDot + 1);
4139 if offDot >= 0 and offDot != len(sBrief) - 1:
4140 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4141
4142 # Update the instruction.
4143 if oInstr.sBrief is not None:
4144 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4145 % (sTag, oInstr.sBrief, sBrief,));
4146 _ = iEndLine;
4147 return True;
4148
4149 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4150 """
4151 Tag: @opdesc
4152 Value: Text description, multiple sections, appended.
4153
4154 It is used to describe instructions.
4155 """
4156 oInstr = self.ensureInstructionForOpTag(iTagLine);
4157 if aasSections:
4158 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4159 return True;
4160
4161 _ = sTag; _ = iEndLine;
4162 return True;
4163
4164 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4165 """
4166 Tag: @opmenmonic
4167 Value: mnemonic
4168
4169 The 'mnemonic' value must be a valid C identifier string. Because of
4170 prefixes, groups and whatnot, there times when the mnemonic isn't that
4171 of an actual assembler mnemonic.
4172 """
4173 oInstr = self.ensureInstructionForOpTag(iTagLine);
4174
4175 # Flatten and validate the value.
4176 sMnemonic = self.flattenAllSections(aasSections);
4177 if not self.oReMnemonic.match(sMnemonic):
4178 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4179 if oInstr.sMnemonic is not None:
4180 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4181 % (sTag, oInstr.sMnemonic, sMnemonic,));
4182 oInstr.sMnemonic = sMnemonic
4183
4184 _ = iEndLine;
4185 return True;
4186
4187 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4188 """
4189 Tags: @op1, @op2, @op3, @op4
4190 Value: [where:]type
4191
4192 The 'where' value indicates where the operand is found, like the 'reg'
4193 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4194 a list.
4195
4196 The 'type' value indicates the operand type. These follow the types
4197 given in the opcode tables in the CPU reference manuals.
4198 See Instruction.kdOperandTypes for a list.
4199
4200 """
4201 oInstr = self.ensureInstructionForOpTag(iTagLine);
4202 idxOp = int(sTag[-1]) - 1;
4203 assert 0 <= idxOp < 4;
4204
4205 # flatten, split up, and validate the "where:type" value.
4206 sFlattened = self.flattenAllSections(aasSections);
4207 asSplit = sFlattened.split(':');
4208 if len(asSplit) == 1:
4209 sType = asSplit[0];
4210 sWhere = None;
4211 elif len(asSplit) == 2:
4212 (sWhere, sType) = asSplit;
4213 else:
4214 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4215
4216 if sType not in g_kdOpTypes:
4217 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4218 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4219 if sWhere is None:
4220 sWhere = g_kdOpTypes[sType][1];
4221 elif sWhere not in g_kdOpLocations:
4222 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4223 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4224
4225 # Insert the operand, refusing to overwrite an existing one.
4226 while idxOp >= len(oInstr.aoOperands):
4227 oInstr.aoOperands.append(None);
4228 if oInstr.aoOperands[idxOp] is not None:
4229 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4230 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4231 sWhere, sType,));
4232 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4233
4234 _ = iEndLine;
4235 return True;
4236
4237 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4238 """
4239 Tag: @opmaps
4240 Value: map[,map2]
4241
4242 Indicates which maps the instruction is in. There is a default map
4243 associated with each input file.
4244 """
4245 oInstr = self.ensureInstructionForOpTag(iTagLine);
4246
4247 # Flatten, split up and validate the value.
4248 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4249 asMaps = sFlattened.split(',');
4250 if not asMaps:
4251 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4252 for sMap in asMaps:
4253 if sMap not in g_dInstructionMaps:
4254 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4255 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4256
4257 # Add the maps to the current list. Throw errors on duplicates.
4258 for oMap in oInstr.aoMaps:
4259 if oMap.sName in asMaps:
4260 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4261
4262 for sMap in asMaps:
4263 oMap = g_dInstructionMaps[sMap];
4264 if oMap not in oInstr.aoMaps:
4265 oInstr.aoMaps.append(oMap);
4266 else:
4267 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4268
4269 _ = iEndLine;
4270 return True;
4271
4272 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4273 """
4274 Tag: @oppfx
4275 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4276
4277 Required prefix for the instruction. (In a (E)VEX context this is the
4278 value of the 'pp' field rather than an actual prefix.)
4279 """
4280 oInstr = self.ensureInstructionForOpTag(iTagLine);
4281
4282 # Flatten and validate the value.
4283 sFlattened = self.flattenAllSections(aasSections);
4284 asPrefixes = sFlattened.split();
4285 if len(asPrefixes) > 1:
4286 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4287
4288 sPrefix = asPrefixes[0].lower();
4289 if sPrefix == 'none':
4290 sPrefix = 'none';
4291 elif sPrefix == 'n/a':
4292 sPrefix = None;
4293 else:
4294 if len(sPrefix) == 2:
4295 sPrefix = '0x' + sPrefix;
4296 if not _isValidOpcodeByte(sPrefix):
4297 if sPrefix != '!0xf3':
4298 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4299
4300 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4301 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4302
4303 # Set it.
4304 if oInstr.sPrefix is not None:
4305 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4306 oInstr.sPrefix = sPrefix;
4307
4308 _ = iEndLine;
4309 return True;
4310
4311 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4312 """
4313 Tag: @opcode
4314 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4315
4316 The opcode byte or sub-byte for the instruction in the context of a map.
4317 """
4318 oInstr = self.ensureInstructionForOpTag(iTagLine);
4319
4320 # Flatten and validate the value.
4321 sOpcode = self.flattenAllSections(aasSections);
4322 if _isValidOpcodeByte(sOpcode):
4323 pass;
4324 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4325 pass;
4326 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4327 pass;
4328 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4329 pass;
4330 else:
4331 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4332
4333 # Set it.
4334 if oInstr.sOpcode is not None:
4335 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4336 oInstr.sOpcode = sOpcode;
4337
4338 _ = iEndLine;
4339 return True;
4340
4341 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4342 """
4343 Tag: @opcodesub
4344 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4345 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4346 | !11 rex.w=0 | !11 mr/reg rex.w=0
4347 | !11 rex.w=1 | !11 mr/reg rex.w=1
4348
4349 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4350 represents exactly two different instructions. The more proper way would
4351 be to go via maps with two members, but this is faster.
4352 """
4353 oInstr = self.ensureInstructionForOpTag(iTagLine);
4354
4355 # Flatten and validate the value.
4356 sSubOpcode = self.flattenAllSections(aasSections);
4357 if sSubOpcode not in g_kdSubOpcodes:
4358 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4359 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4360 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4361
4362 # Set it.
4363 if oInstr.sSubOpcode is not None:
4364 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4365 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4366 oInstr.sSubOpcode = sSubOpcode;
4367
4368 _ = iEndLine;
4369 return True;
4370
4371 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4372 """
4373 Tag: @openc
4374 Value: ModR/M|fixed|prefix|<map name>
4375
4376 The instruction operand encoding style.
4377 """
4378 oInstr = self.ensureInstructionForOpTag(iTagLine);
4379
4380 # Flatten and validate the value.
4381 sEncoding = self.flattenAllSections(aasSections);
4382 if sEncoding in g_kdEncodings:
4383 pass;
4384 elif sEncoding in g_dInstructionMaps:
4385 pass;
4386 elif not _isValidOpcodeByte(sEncoding):
4387 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4388
4389 # Set it.
4390 if oInstr.sEncoding is not None:
4391 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4392 % ( sTag, oInstr.sEncoding, sEncoding,));
4393 oInstr.sEncoding = sEncoding;
4394
4395 _ = iEndLine;
4396 return True;
4397
4398 ## EFlags tag to Instruction attribute name.
4399 kdOpFlagToAttr = {
4400 '@opfltest': 'asFlTest',
4401 '@opflmodify': 'asFlModify',
4402 '@opflundef': 'asFlUndefined',
4403 '@opflset': 'asFlSet',
4404 '@opflclear': 'asFlClear',
4405 };
4406
4407 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4408 """
4409 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4410 Value: <eflags specifier>
4411
4412 """
4413 oInstr = self.ensureInstructionForOpTag(iTagLine);
4414
4415 # Flatten, split up and validate the values.
4416 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4417 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4418 asFlags = [];
4419 else:
4420 fRc = True;
4421 for iFlag, sFlag in enumerate(asFlags):
4422 if sFlag not in g_kdEFlagsMnemonics:
4423 if sFlag.strip() in g_kdEFlagsMnemonics:
4424 asFlags[iFlag] = sFlag.strip();
4425 else:
4426 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4427 if not fRc:
4428 return False;
4429
4430 # Set them.
4431 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4432 if asOld is not None and len(asOld) > 0:
4433 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4434 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4435
4436 _ = iEndLine;
4437 return True;
4438
4439 ## EFLAGS class definitions with their attribute lists.
4440 kdEFlagsClasses = {
4441 'arithmetic': { # add, sub, ...
4442 'asFlTest': [],
4443 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4444 'asFlClear': [],
4445 'asFlSet': [],
4446 'asFlUndefined': [],
4447 },
4448 'arithmetic_carry': { # adc, sbb, ...
4449 'asFlTest': [ 'cf', ],
4450 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4451 'asFlClear': [],
4452 'asFlSet': [],
4453 'asFlUndefined': [],
4454 },
4455 'incdec': {
4456 'asFlTest': [],
4457 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4458 'asFlClear': [],
4459 'asFlSet': [],
4460 'asFlUndefined': [],
4461 },
4462 'division': { ## @todo specify intel/amd differences...
4463 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4464 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4465 'asFlClear': [],
4466 'asFlSet': [],
4467 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4468 },
4469 'multiply': { ## @todo specify intel/amd differences...
4470 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4471 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4472 'asFlClear': [], # between IMUL and MUL.
4473 'asFlSet': [],
4474 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4475 },
4476 'logical': { # and, or, xor, ...
4477 'asFlTest': [],
4478 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4479 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4480 'asFlSet': [],
4481 'asFlUndefined': [ 'af', ],
4482 },
4483 'rotate_1': { # rol and ror with fixed 1 shift count
4484 'asFlTest': [],
4485 'asFlModify': [ 'cf', 'of', ],
4486 'asFlClear': [],
4487 'asFlSet': [],
4488 'asFlUndefined': [],
4489 },
4490 'rotate_count': { # rol and ror w/o fixed 1 shift count
4491 'asFlTest': [],
4492 'asFlModify': [ 'cf', 'of', ],
4493 'asFlClear': [],
4494 'asFlSet': [],
4495 'asFlUndefined': [ 'of', ],
4496 },
4497 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4498 'asFlTest': [ 'cf', ],
4499 'asFlModify': [ 'cf', 'of', ],
4500 'asFlClear': [],
4501 'asFlSet': [],
4502 'asFlUndefined': [],
4503 },
4504 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4505 'asFlTest': [ 'cf', ],
4506 'asFlModify': [ 'cf', 'of', ],
4507 'asFlClear': [],
4508 'asFlSet': [],
4509 'asFlUndefined': [ 'of', ],
4510 },
4511 'shift_1': { # shl, shr or sar with fixed 1 count.
4512 'asFlTest': [],
4513 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4514 'asFlClear': [],
4515 'asFlSet': [],
4516 'asFlUndefined': [ 'af', ],
4517 },
4518 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4519 'asFlTest': [],
4520 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4521 'asFlClear': [],
4522 'asFlSet': [],
4523 'asFlUndefined': [ 'af', 'of', ],
4524 },
4525 'bitmap': { # bt, btc, btr, btc
4526 'asFlTest': [],
4527 'asFlModify': [ 'cf', ],
4528 'asFlClear': [],
4529 'asFlSet': [],
4530 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4531 },
4532 'unchanged': {
4533 'asFlTest': [],
4534 'asFlModify': [],
4535 'asFlClear': [],
4536 'asFlSet': [],
4537 'asFlUndefined': [],
4538 },
4539 };
4540 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4541 """
4542 Tags: @opflclass
4543 Value: arithmetic, logical, ...
4544
4545 """
4546 oInstr = self.ensureInstructionForOpTag(iTagLine);
4547
4548 # Flatten and validate the value.
4549 sClass = self.flattenAllSections(aasSections);
4550 kdAttribs = self.kdEFlagsClasses.get(sClass);
4551 if not kdAttribs:
4552 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4553 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4554
4555 # Set the attributes.
4556 for sAttrib, asFlags in kdAttribs.items():
4557 asOld = getattr(oInstr, sAttrib);
4558 if asOld is not None:
4559 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4560 % (sTag, asOld, asFlags, sAttrib));
4561 setattr(oInstr, sAttrib, asFlags);
4562
4563 _ = iEndLine;
4564 return True;
4565
4566 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4567 """
4568 Tag: @ophints
4569 Value: Comma or space separated list of flags and hints.
4570
4571 This covers the disassembler flags table and more.
4572 """
4573 oInstr = self.ensureInstructionForOpTag(iTagLine);
4574
4575 # Flatten as a space separated list, split it up and validate the values.
4576 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4577 if len(asHints) == 1 and asHints[0].lower() == 'none':
4578 asHints = [];
4579 else:
4580 fRc = True;
4581 for iHint, sHint in enumerate(asHints):
4582 if sHint not in g_kdHints:
4583 if sHint.strip() in g_kdHints:
4584 sHint[iHint] = sHint.strip();
4585 else:
4586 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4587 if not fRc:
4588 return False;
4589
4590 # Append them.
4591 for sHint in asHints:
4592 if sHint not in oInstr.dHints:
4593 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4594 else:
4595 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4596
4597 _ = iEndLine;
4598 return True;
4599
4600 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4601 """
4602 Tag: @opdisenum
4603 Value: OP_XXXX
4604
4605 This is for select a specific (legacy) disassembler enum value for the
4606 instruction.
4607 """
4608 oInstr = self.ensureInstructionForOpTag(iTagLine);
4609
4610 # Flatten and split.
4611 asWords = self.flattenAllSections(aasSections).split();
4612 if len(asWords) != 1:
4613 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4614 if not asWords:
4615 return False;
4616 sDisEnum = asWords[0];
4617 if not self.oReDisEnum.match(sDisEnum):
4618 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4619 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4620
4621 # Set it.
4622 if oInstr.sDisEnum is not None:
4623 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4624 oInstr.sDisEnum = sDisEnum;
4625
4626 _ = iEndLine;
4627 return True;
4628
4629 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4630 """
4631 Tag: @opmincpu
4632 Value: <simple CPU name>
4633
4634 Indicates when this instruction was introduced.
4635 """
4636 oInstr = self.ensureInstructionForOpTag(iTagLine);
4637
4638 # Flatten the value, split into words, make sure there's just one, valid it.
4639 asCpus = self.flattenAllSections(aasSections).split();
4640 if len(asCpus) > 1:
4641 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4642
4643 sMinCpu = asCpus[0];
4644 if sMinCpu in g_kdCpuNames:
4645 oInstr.sMinCpu = sMinCpu;
4646 else:
4647 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4648 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4649
4650 # Set it.
4651 if oInstr.sMinCpu is None:
4652 oInstr.sMinCpu = sMinCpu;
4653 elif oInstr.sMinCpu != sMinCpu:
4654 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4655
4656 _ = iEndLine;
4657 return True;
4658
4659 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4660 """
4661 Tag: @opcpuid
4662 Value: none | <CPUID flag specifier>
4663
4664 CPUID feature bit which is required for the instruction to be present.
4665 """
4666 oInstr = self.ensureInstructionForOpTag(iTagLine);
4667
4668 # Flatten as a space separated list, split it up and validate the values.
4669 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4670 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4671 asCpuIds = [];
4672 else:
4673 fRc = True;
4674 for iCpuId, sCpuId in enumerate(asCpuIds):
4675 if sCpuId not in g_kdCpuIdFlags:
4676 if sCpuId.strip() in g_kdCpuIdFlags:
4677 sCpuId[iCpuId] = sCpuId.strip();
4678 else:
4679 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4680 if not fRc:
4681 return False;
4682
4683 # Append them.
4684 for sCpuId in asCpuIds:
4685 if sCpuId not in oInstr.asCpuIds:
4686 oInstr.asCpuIds.append(sCpuId);
4687 else:
4688 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4689
4690 _ = iEndLine;
4691 return True;
4692
4693 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4694 """
4695 Tag: @opgroup
4696 Value: op_grp1[_subgrp2[_subsubgrp3]]
4697
4698 Instruction grouping.
4699 """
4700 oInstr = self.ensureInstructionForOpTag(iTagLine);
4701
4702 # Flatten as a space separated list, split it up and validate the values.
4703 asGroups = self.flattenAllSections(aasSections).split();
4704 if len(asGroups) != 1:
4705 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4706 sGroup = asGroups[0];
4707 if not self.oReGroupName.match(sGroup):
4708 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4709 % (sTag, sGroup, self.oReGroupName.pattern));
4710
4711 # Set it.
4712 if oInstr.sGroup is not None:
4713 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4714 oInstr.sGroup = sGroup;
4715
4716 _ = iEndLine;
4717 return True;
4718
4719 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4720 """
4721 Tag: @opunused, @opinvalid, @opinvlstyle
4722 Value: <invalid opcode behaviour style>
4723
4724 The @opunused indicates the specification is for a currently unused
4725 instruction encoding.
4726
4727 The @opinvalid indicates the specification is for an invalid currently
4728 instruction encoding (like UD2).
4729
4730 The @opinvlstyle just indicates how CPUs decode the instruction when
4731 not supported (@opcpuid, @opmincpu) or disabled.
4732 """
4733 oInstr = self.ensureInstructionForOpTag(iTagLine);
4734
4735 # Flatten as a space separated list, split it up and validate the values.
4736 asStyles = self.flattenAllSections(aasSections).split();
4737 if len(asStyles) != 1:
4738 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4739 sStyle = asStyles[0];
4740 if sStyle not in g_kdInvalidStyles:
4741 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4742 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4743 # Set it.
4744 if oInstr.sInvalidStyle is not None:
4745 return self.errorComment(iTagLine,
4746 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4747 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4748 oInstr.sInvalidStyle = sStyle;
4749 if sTag == '@opunused':
4750 oInstr.fUnused = True;
4751 elif sTag == '@opinvalid':
4752 oInstr.fInvalid = True;
4753
4754 _ = iEndLine;
4755 return True;
4756
4757 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4758 """
4759 Tag: @optest
4760 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4761 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4762
4763 The main idea here is to generate basic instruction tests.
4764
4765 The probably simplest way of handling the diverse input, would be to use
4766 it to produce size optimized byte code for a simple interpreter that
4767 modifies the register input and output states.
4768
4769 An alternative to the interpreter would be creating multiple tables,
4770 but that becomes rather complicated wrt what goes where and then to use
4771 them in an efficient manner.
4772 """
4773 oInstr = self.ensureInstructionForOpTag(iTagLine);
4774
4775 #
4776 # Do it section by section.
4777 #
4778 for asSectionLines in aasSections:
4779 #
4780 # Sort the input into outputs, inputs and selector conditions.
4781 #
4782 sFlatSection = self.flattenAllSections([asSectionLines,]);
4783 if not sFlatSection:
4784 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4785 continue;
4786 oTest = InstructionTest(oInstr);
4787
4788 asSelectors = [];
4789 asInputs = [];
4790 asOutputs = [];
4791 asCur = asOutputs;
4792 fRc = True;
4793 asWords = sFlatSection.split();
4794 for iWord in range(len(asWords) - 1, -1, -1):
4795 sWord = asWords[iWord];
4796 # Check for array switchers.
4797 if sWord == '->':
4798 if asCur != asOutputs:
4799 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4800 break;
4801 asCur = asInputs;
4802 elif sWord == '/':
4803 if asCur != asInputs:
4804 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4805 break;
4806 asCur = asSelectors;
4807 else:
4808 asCur.insert(0, sWord);
4809
4810 #
4811 # Validate and add selectors.
4812 #
4813 for sCond in asSelectors:
4814 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4815 oSelector = None;
4816 for sOp in TestSelector.kasCompareOps:
4817 off = sCondExp.find(sOp);
4818 if off >= 0:
4819 sVariable = sCondExp[:off];
4820 sValue = sCondExp[off + len(sOp):];
4821 if sVariable in TestSelector.kdVariables:
4822 if sValue in TestSelector.kdVariables[sVariable]:
4823 oSelector = TestSelector(sVariable, sOp, sValue);
4824 else:
4825 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4826 % ( sTag, sValue, sCond,
4827 TestSelector.kdVariables[sVariable].keys(),));
4828 else:
4829 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4830 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4831 break;
4832 if oSelector is not None:
4833 for oExisting in oTest.aoSelectors:
4834 if oExisting.sVariable == oSelector.sVariable:
4835 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4836 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4837 oTest.aoSelectors.append(oSelector);
4838 else:
4839 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4840
4841 #
4842 # Validate outputs and inputs, adding them to the test as we go along.
4843 #
4844 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4845 asValidFieldKinds = [ 'both', sDesc, ];
4846 for sItem in asItems:
4847 oItem = None;
4848 for sOp in TestInOut.kasOperators:
4849 off = sItem.find(sOp);
4850 if off < 0:
4851 continue;
4852 sField = sItem[:off];
4853 sValueType = sItem[off + len(sOp):];
4854 if sField in TestInOut.kdFields \
4855 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4856 asSplit = sValueType.split(':', 1);
4857 sValue = asSplit[0];
4858 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4859 if sType in TestInOut.kdTypes:
4860 oValid = TestInOut.kdTypes[sType].validate(sValue);
4861 if oValid is True:
4862 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4863 oItem = TestInOut(sField, sOp, sValue, sType);
4864 else:
4865 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4866 % ( sTag, sDesc, sItem, ));
4867 else:
4868 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4869 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4870 else:
4871 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4872 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4873 else:
4874 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4875 % ( sTag, sDesc, sField, sItem,
4876 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4877 if asVal[1] in asValidFieldKinds]),));
4878 break;
4879 if oItem is not None:
4880 for oExisting in aoDst:
4881 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4882 self.errorComment(iTagLine,
4883 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4884 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4885 aoDst.append(oItem);
4886 else:
4887 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4888
4889 #
4890 # .
4891 #
4892 if fRc:
4893 oInstr.aoTests.append(oTest);
4894 else:
4895 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4896 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4897 % (sTag, asSelectors, asInputs, asOutputs,));
4898
4899 _ = iEndLine;
4900 return True;
4901
4902 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4903 """
4904 Numbered @optest tag. Either @optest42 or @optest[42].
4905 """
4906 oInstr = self.ensureInstructionForOpTag(iTagLine);
4907
4908 iTest = 0;
4909 if sTag[-1] == ']':
4910 iTest = int(sTag[8:-1]);
4911 else:
4912 iTest = int(sTag[7:]);
4913
4914 if iTest != len(oInstr.aoTests):
4915 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4916 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4917
4918 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4919 """
4920 Tag: @optestign | @optestignore
4921 Value: <value is ignored>
4922
4923 This is a simple trick to ignore a test while debugging another.
4924
4925 See also @oponlytest.
4926 """
4927 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4928 return True;
4929
4930 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4931 """
4932 Tag: @opcopytests
4933 Value: <opstat | function> [..]
4934 Example: @opcopytests add_Eb_Gb
4935
4936 Trick to avoid duplicating tests for different encodings of the same
4937 operation.
4938 """
4939 oInstr = self.ensureInstructionForOpTag(iTagLine);
4940
4941 # Flatten, validate and append the copy job to the instruction. We execute
4942 # them after parsing all the input so we can handle forward references.
4943 asToCopy = self.flattenAllSections(aasSections).split();
4944 if not asToCopy:
4945 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4946 for sToCopy in asToCopy:
4947 if sToCopy not in oInstr.asCopyTests:
4948 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4949 oInstr.asCopyTests.append(sToCopy);
4950 else:
4951 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4952 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4953 else:
4954 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4955
4956 _ = iEndLine;
4957 return True;
4958
4959 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4960 """
4961 Tag: @oponlytest | @oponly
4962 Value: none
4963
4964 Only test instructions with this tag. This is a trick that is handy
4965 for singling out one or two new instructions or tests.
4966
4967 See also @optestignore.
4968 """
4969 oInstr = self.ensureInstructionForOpTag(iTagLine);
4970
4971 # Validate and add instruction to only test dictionary.
4972 sValue = self.flattenAllSections(aasSections).strip();
4973 if sValue:
4974 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4975
4976 if oInstr not in g_aoOnlyTestInstructions:
4977 g_aoOnlyTestInstructions.append(oInstr);
4978
4979 _ = iEndLine;
4980 return True;
4981
4982 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4983 """
4984 Tag: @opxcpttype
4985 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4986
4987 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4988 """
4989 oInstr = self.ensureInstructionForOpTag(iTagLine);
4990
4991 # Flatten as a space separated list, split it up and validate the values.
4992 asTypes = self.flattenAllSections(aasSections).split();
4993 if len(asTypes) != 1:
4994 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4995 sType = asTypes[0];
4996 if sType not in g_kdXcptTypes:
4997 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4998 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4999 # Set it.
5000 if oInstr.sXcptType is not None:
5001 return self.errorComment(iTagLine,
5002 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
5003 % ( sTag, oInstr.sXcptType, sType,));
5004 oInstr.sXcptType = sType;
5005
5006 _ = iEndLine;
5007 return True;
5008
5009 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5010 """
5011 Tag: @opfunction
5012 Value: <VMM function name>
5013
5014 This is for explicitly setting the IEM function name. Normally we pick
5015 this up from the FNIEMOP_XXX macro invocation after the description, or
5016 generate it from the mnemonic and operands.
5017
5018 It it thought it maybe necessary to set it when specifying instructions
5019 which implementation isn't following immediately or aren't implemented yet.
5020 """
5021 oInstr = self.ensureInstructionForOpTag(iTagLine);
5022
5023 # Flatten and validate the value.
5024 sFunction = self.flattenAllSections(aasSections);
5025 if not self.oReFunctionName.match(sFunction):
5026 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5027 % (sTag, sFunction, self.oReFunctionName.pattern));
5028
5029 if oInstr.sFunction is not None:
5030 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5031 % (sTag, oInstr.sFunction, sFunction,));
5032 oInstr.sFunction = sFunction;
5033
5034 _ = iEndLine;
5035 return True;
5036
5037 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5038 """
5039 Tag: @opstats
5040 Value: <VMM statistics base name>
5041
5042 This is for explicitly setting the statistics name. Normally we pick
5043 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5044 the mnemonic and operands.
5045
5046 It it thought it maybe necessary to set it when specifying instructions
5047 which implementation isn't following immediately or aren't implemented yet.
5048 """
5049 oInstr = self.ensureInstructionForOpTag(iTagLine);
5050
5051 # Flatten and validate the value.
5052 sStats = self.flattenAllSections(aasSections);
5053 if not self.oReStatsName.match(sStats):
5054 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5055 % (sTag, sStats, self.oReStatsName.pattern));
5056
5057 if oInstr.sStats is not None:
5058 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5059 % (sTag, oInstr.sStats, sStats,));
5060 oInstr.sStats = sStats;
5061
5062 _ = iEndLine;
5063 return True;
5064
5065 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5066 """
5067 Tag: @opdone
5068 Value: none
5069
5070 Used to explictily flush the instructions that have been specified.
5071 """
5072 sFlattened = self.flattenAllSections(aasSections);
5073 if sFlattened != '':
5074 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5075 _ = sTag; _ = iEndLine;
5076 return self.doneInstructions();
5077
5078 ## @}
5079
5080
5081 def parseComment(self):
5082 """
5083 Parse the current comment (self.sComment).
5084
5085 If it's a opcode specifiying comment, we reset the macro stuff.
5086 """
5087 #
5088 # Reject if comment doesn't seem to contain anything interesting.
5089 #
5090 if self.sComment.find('Opcode') < 0 \
5091 and self.sComment.find('@') < 0:
5092 return False;
5093
5094 #
5095 # Split the comment into lines, removing leading asterisks and spaces.
5096 # Also remove leading and trailing empty lines.
5097 #
5098 asLines = self.sComment.split('\n');
5099 for iLine, sLine in enumerate(asLines):
5100 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5101
5102 while asLines and not asLines[0]:
5103 self.iCommentLine += 1;
5104 asLines.pop(0);
5105
5106 while asLines and not asLines[-1]:
5107 asLines.pop(len(asLines) - 1);
5108
5109 #
5110 # Check for old style: Opcode 0x0f 0x12
5111 #
5112 if asLines[0].startswith('Opcode '):
5113 self.parseCommentOldOpcode(asLines);
5114
5115 #
5116 # Look for @op* tagged data.
5117 #
5118 cOpTags = 0;
5119 sFlatDefault = None;
5120 sCurTag = '@default';
5121 iCurTagLine = 0;
5122 asCurSection = [];
5123 aasSections = [ asCurSection, ];
5124 for iLine, sLine in enumerate(asLines):
5125 if not sLine.startswith('@'):
5126 if sLine:
5127 asCurSection.append(sLine);
5128 elif asCurSection:
5129 asCurSection = [];
5130 aasSections.append(asCurSection);
5131 else:
5132 #
5133 # Process the previous tag.
5134 #
5135 if not asCurSection and len(aasSections) > 1:
5136 aasSections.pop(-1);
5137 if sCurTag in self.dTagHandlers:
5138 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5139 cOpTags += 1;
5140 elif sCurTag.startswith('@op'):
5141 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5142 elif sCurTag == '@default':
5143 sFlatDefault = self.flattenAllSections(aasSections);
5144 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5145 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5146 elif sCurTag in ['@encoding', '@opencoding']:
5147 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5148
5149 #
5150 # New tag.
5151 #
5152 asSplit = sLine.split(None, 1);
5153 sCurTag = asSplit[0].lower();
5154 if len(asSplit) > 1:
5155 asCurSection = [asSplit[1],];
5156 else:
5157 asCurSection = [];
5158 aasSections = [asCurSection, ];
5159 iCurTagLine = iLine;
5160
5161 #
5162 # Process the final tag.
5163 #
5164 if not asCurSection and len(aasSections) > 1:
5165 aasSections.pop(-1);
5166 if sCurTag in self.dTagHandlers:
5167 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5168 cOpTags += 1;
5169 elif sCurTag.startswith('@op'):
5170 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5171 elif sCurTag == '@default':
5172 sFlatDefault = self.flattenAllSections(aasSections);
5173
5174 #
5175 # Don't allow default text in blocks containing @op*.
5176 #
5177 if cOpTags > 0 and sFlatDefault:
5178 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5179
5180 return True;
5181
5182 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5183 """
5184 Parses a macro invocation.
5185
5186 Returns three values:
5187 1. A list of macro arguments, where the zero'th is the macro name.
5188 2. The offset following the macro invocation, into sInvocation of
5189 this is on the same line or into the last line if it is on a
5190 different line.
5191 3. Number of additional lines the invocation spans (i.e. zero if
5192 it is all contained within sInvocation).
5193 """
5194 # First the name.
5195 offOpen = sInvocation.find('(', offStartInvocation);
5196 if offOpen <= offStartInvocation:
5197 self.raiseError("macro invocation open parenthesis not found");
5198 sName = sInvocation[offStartInvocation:offOpen].strip();
5199 if not self.oReMacroName.match(sName):
5200 self.raiseError("invalid macro name '%s'" % (sName,));
5201 asRet = [sName, ];
5202
5203 # Arguments.
5204 iLine = self.iLine;
5205 cDepth = 1;
5206 off = offOpen + 1;
5207 offStart = off;
5208 offCurLn = 0;
5209 chQuote = None;
5210 while cDepth > 0:
5211 if off >= len(sInvocation):
5212 if iLine >= len(self.asLines):
5213 self.error('macro invocation beyond end of file');
5214 return (asRet, off - offCurLn, iLine - self.iLine);
5215 offCurLn = off;
5216 sInvocation += self.asLines[iLine];
5217 iLine += 1;
5218 ch = sInvocation[off];
5219
5220 if chQuote:
5221 if ch == '\\' and off + 1 < len(sInvocation):
5222 off += 1;
5223 elif ch == chQuote:
5224 chQuote = None;
5225 elif ch in ('"', '\'',):
5226 chQuote = ch;
5227 elif ch in (',', ')',):
5228 if cDepth == 1:
5229 asRet.append(sInvocation[offStart:off].strip());
5230 offStart = off + 1;
5231 if ch == ')':
5232 cDepth -= 1;
5233 elif ch == '(':
5234 cDepth += 1;
5235 off += 1;
5236
5237 return (asRet, off - offCurLn, iLine - self.iLine);
5238
5239 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5240 """
5241 Returns (None, len(sCode), 0) if not found, otherwise the
5242 parseMacroInvocation() return value.
5243 """
5244 offHit = sCode.find(sMacro, offStart);
5245 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5246 return self.parseMacroInvocation(sCode, offHit);
5247 return (None, len(sCode), 0);
5248
5249 def findAndParseMacroInvocation(self, sCode, sMacro):
5250 """
5251 Returns None if not found, arguments as per parseMacroInvocation if found.
5252 """
5253 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5254
5255 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5256 """
5257 Returns same as findAndParseMacroInvocation.
5258 """
5259 for sMacro in asMacro:
5260 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5261 if asRet is not None:
5262 return asRet;
5263 return None;
5264
5265 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5266 sDisHints, sIemHints, asOperands):
5267 """
5268 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5269 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5270 """
5271 #
5272 # Some invocation checks.
5273 #
5274 if sUpper != sUpper.upper():
5275 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5276 if sLower != sLower.lower():
5277 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5278 if sUpper.lower() != sLower:
5279 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5280 if not self.oReMnemonic.match(sLower):
5281 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5282
5283 #
5284 # Check if sIemHints tells us to not consider this macro invocation.
5285 #
5286 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5287 return True;
5288
5289 # Apply to the last instruction only for now.
5290 if not self.aoCurInstrs:
5291 self.addInstruction();
5292 oInstr = self.aoCurInstrs[-1];
5293 if oInstr.iLineMnemonicMacro == -1:
5294 oInstr.iLineMnemonicMacro = self.iLine;
5295 else:
5296 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5297 % (sMacro, oInstr.iLineMnemonicMacro,));
5298
5299 # Mnemonic
5300 if oInstr.sMnemonic is None:
5301 oInstr.sMnemonic = sLower;
5302 elif oInstr.sMnemonic != sLower:
5303 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5304
5305 # Process operands.
5306 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5307 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5308 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5309 for iOperand, sType in enumerate(asOperands):
5310 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5311 if sWhere is None:
5312 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5313 if iOperand < len(oInstr.aoOperands): # error recovery.
5314 sWhere = oInstr.aoOperands[iOperand].sWhere;
5315 sType = oInstr.aoOperands[iOperand].sType;
5316 else:
5317 sWhere = 'reg';
5318 sType = 'Gb';
5319 if iOperand == len(oInstr.aoOperands):
5320 oInstr.aoOperands.append(Operand(sWhere, sType))
5321 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5322 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5323 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5324 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5325
5326 # Encoding.
5327 if sForm not in g_kdIemForms:
5328 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5329 else:
5330 if oInstr.sEncoding is None:
5331 oInstr.sEncoding = g_kdIemForms[sForm][0];
5332 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5333 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5334 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5335
5336 # Check the parameter locations for the encoding.
5337 if g_kdIemForms[sForm][1] is not None:
5338 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5339 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5340 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5341 else:
5342 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5343 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5344 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5345 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5346 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5347 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5348 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5349 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5350 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5351 or sForm.replace('VEX','').find('V') < 0) ):
5352 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5353 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5354 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5355 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5356 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5357 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5358 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5359 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5360 oInstr.aoOperands[iOperand].sWhere));
5361
5362
5363 # Check @opcodesub
5364 if oInstr.sSubOpcode \
5365 and g_kdIemForms[sForm][2] \
5366 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5367 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5368 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5369
5370 # Stats.
5371 if not self.oReStatsName.match(sStats):
5372 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5373 elif oInstr.sStats is None:
5374 oInstr.sStats = sStats;
5375 elif oInstr.sStats != sStats:
5376 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5377 % (sMacro, oInstr.sStats, sStats,));
5378
5379 # Process the hints (simply merge with @ophints w/o checking anything).
5380 for sHint in sDisHints.split('|'):
5381 sHint = sHint.strip();
5382 if sHint.startswith('DISOPTYPE_'):
5383 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5384 if sShortHint in g_kdHints:
5385 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5386 else:
5387 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5388 elif sHint != '0':
5389 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5390
5391 for sHint in sIemHints.split('|'):
5392 sHint = sHint.strip();
5393 if sHint.startswith('IEMOPHINT_'):
5394 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5395 if sShortHint in g_kdHints:
5396 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5397 else:
5398 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5399 elif sHint != '0':
5400 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5401
5402 _ = sAsm;
5403 return True;
5404
5405 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5406 """
5407 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5408 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5409 """
5410 if not asOperands:
5411 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5412 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5413 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5414
5415 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5416 """
5417 Process a IEM_MC_BEGIN macro invocation.
5418 """
5419 if self.fDebugMc:
5420 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5421 #self.debug('%s<eos>' % (sCode,));
5422
5423 # Check preconditions.
5424 if not self.oCurFunction:
5425 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5426 if self.oCurMcBlock:
5427 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5428
5429 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5430 cchIndent = offBeginStatementInCodeStr;
5431 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5432 if offPrevNewline >= 0:
5433 cchIndent -= offPrevNewline + 1;
5434 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5435
5436 # Start a new block.
5437 # But don't add it to the list unless the context matches the host architecture.
5438 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5439 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5440 cchIndent = cchIndent);
5441 try:
5442 if ( not self.aoCppCondStack
5443 or not self.sHostArch
5444 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5445 g_aoMcBlocks.append(self.oCurMcBlock);
5446 self.cTotalMcBlocks += 1;
5447 except Exception as oXcpt:
5448 self.raiseError(oXcpt.args[0]);
5449
5450 if self.oCurMcBlock.oInstruction:
5451 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5452 self.iMcBlockInFunc += 1;
5453 return True;
5454
5455 @staticmethod
5456 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5457 """
5458 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5459 extracting a statement block from a string that's the result of macro
5460 expansion and therefore contains multiple "sub-lines" as it were.
5461
5462 Returns list of lines covering offBegin thru offEnd in sRawLine.
5463 """
5464
5465 off = sRawLine.find('\n', offEnd);
5466 if off > 0:
5467 sRawLine = sRawLine[:off + 1];
5468
5469 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5470 sRawLine = sRawLine[off:];
5471 if not sRawLine.strip().startswith(sBeginStmt):
5472 sRawLine = sRawLine[offBegin - off:]
5473
5474 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5475
5476 def workerIemMcEnd(self, offEndStatementInLine):
5477 """
5478 Process a IEM_MC_END macro invocation.
5479 """
5480 if self.fDebugMc:
5481 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5482
5483 # Check preconditions.
5484 if not self.oCurMcBlock:
5485 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5486
5487 #
5488 # HACK ALERT! For blocks originating from macro expansion the start and
5489 # end line will be the same, but the line has multiple
5490 # newlines inside it. So, we have to do some extra tricks
5491 # to get the lines out of there. We ASSUME macros aren't
5492 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5493 #
5494 if self.iLine > self.oCurMcBlock.iBeginLine:
5495 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5496 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5497 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5498
5499 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5500 # so we can deal correctly with IEM_MC_END below and everything else.
5501 for sLine in asLines:
5502 cNewLines = sLine.count('\n');
5503 assert cNewLines > 0;
5504 if cNewLines > 1:
5505 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5506 self.oCurMcBlock.offBeginLine,
5507 offEndStatementInLine
5508 + sum(len(s) for s in asLines)
5509 - len(asLines[-1]));
5510 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5511 break;
5512 else:
5513 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5514 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5515 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5516
5517 #
5518 # Strip anything following the IEM_MC_END(); statement in the final line,
5519 # so that we don't carry on any trailing 'break' after macro expansions
5520 # like for iemOp_movsb_Xb_Yb.
5521 #
5522 while asLines[-1].strip() == '':
5523 asLines.pop();
5524 sFinal = asLines[-1];
5525 offFinalEnd = sFinal.find('IEM_MC_END');
5526 offEndInFinal = offFinalEnd;
5527 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5528 offFinalEnd += len('IEM_MC_END');
5529
5530 while sFinal[offFinalEnd].isspace():
5531 offFinalEnd += 1;
5532 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5533 offFinalEnd += 1;
5534
5535 while sFinal[offFinalEnd].isspace():
5536 offFinalEnd += 1;
5537 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5538 offFinalEnd += 1;
5539
5540 while sFinal[offFinalEnd].isspace():
5541 offFinalEnd += 1;
5542 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5543 offFinalEnd += 1;
5544
5545 asLines[-1] = sFinal[: offFinalEnd];
5546
5547 #
5548 # Complete and discard the current block.
5549 #
5550 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5551 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5552 self.oCurMcBlock = None;
5553 return True;
5554
5555 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5556 """
5557 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5558 """
5559 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5560 if self.fDebugMc:
5561 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5562 #self.debug('%s<eos>' % (sCode,));
5563
5564 # Check preconditions.
5565 if not self.oCurFunction:
5566 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5567 if self.oCurMcBlock:
5568 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5569
5570 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5571 cchIndent = offBeginStatementInCodeStr;
5572 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5573 if offPrevNewline >= 0:
5574 cchIndent -= offPrevNewline + 1;
5575 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5576
5577 # Start a new block.
5578 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5579 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5580 cchIndent = cchIndent, fDeferToCImpl = True);
5581
5582 # Parse the statment.
5583 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5584 if asArgs is None:
5585 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5586 if len(asArgs) != cParams + 4:
5587 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5588 % (sStmt, len(asArgs), cParams + 4, asArgs));
5589
5590 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5591
5592 # These MCs are not typically part of macro expansions, but let's get
5593 # it out of the way immediately if it's the case.
5594 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5595 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5596 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5597 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5598 asLines[-1] = asLines[-1][:offAfter + 1];
5599 else:
5600 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5601 offAfter, sStmt);
5602 assert asLines[-1].find(';') >= 0;
5603 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5604
5605 assert asLines[0].find(sStmt) >= 0;
5606 #if not asLines[0].strip().startswith(sStmt):
5607 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5608
5609 # Advance to the line with the closing ')'.
5610 self.iLine += cLines;
5611
5612 # Complete the block.
5613 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5614
5615 g_aoMcBlocks.append(oMcBlock);
5616 if oMcBlock.oInstruction:
5617 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5618 self.cTotalMcBlocks += 1;
5619 self.iMcBlockInFunc += 1;
5620
5621 return True;
5622
5623 def workerStartFunction(self, asArgs):
5624 """
5625 Deals with the start of a decoder function.
5626
5627 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5628 macros, so we get a argument list for these where the 0th argument is the
5629 macro name.
5630 """
5631 # Complete any existing function.
5632 if self.oCurFunction:
5633 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5634
5635 # Create the new function.
5636 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5637 return True;
5638
5639 def checkCodeForMacro(self, sCode, offLine):
5640 """
5641 Checks code for relevant macro invocation.
5642 """
5643
5644 #
5645 # Scan macro invocations.
5646 #
5647 if sCode.find('(') > 0:
5648 # Look for instruction decoder function definitions. ASSUME single line.
5649 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5650 [ 'FNIEMOP_DEF',
5651 'FNIEMOPRM_DEF',
5652 'FNIEMOP_STUB',
5653 'FNIEMOP_STUB_1',
5654 'FNIEMOP_UD_STUB',
5655 'FNIEMOP_UD_STUB_1' ]);
5656 if asArgs is not None:
5657 self.workerStartFunction(asArgs);
5658 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5659
5660 if not self.aoCurInstrs:
5661 self.addInstruction();
5662 for oInstr in self.aoCurInstrs:
5663 if oInstr.iLineFnIemOpMacro == -1:
5664 oInstr.iLineFnIemOpMacro = self.iLine;
5665 else:
5666 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5667 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5668 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5669 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5670 if asArgs[0].find('STUB') > 0:
5671 self.doneInstructions(fEndOfFunction = True);
5672 return True;
5673
5674 # Check for worker function definitions, so we can get a context for MC blocks.
5675 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5676 [ 'FNIEMOP_DEF_1',
5677 'FNIEMOP_DEF_2', ]);
5678 if asArgs is not None:
5679 self.workerStartFunction(asArgs);
5680 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5681 return True;
5682
5683 # IEMOP_HLP_DONE_VEX_DECODING_*
5684 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5685 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5686 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5687 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5688 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5689 ]);
5690 if asArgs is not None:
5691 sMacro = asArgs[0];
5692 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5693 for oInstr in self.aoCurInstrs:
5694 if 'vex_l_zero' not in oInstr.dHints:
5695 if oInstr.iLineMnemonicMacro >= 0:
5696 self.errorOnLine(oInstr.iLineMnemonicMacro,
5697 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5698 oInstr.dHints['vex_l_zero'] = True;
5699
5700 #
5701 # IEMOP_MNEMONIC*
5702 #
5703 if sCode.find('IEMOP_MNEMONIC') >= 0:
5704 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5705 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5706 if asArgs is not None:
5707 if len(self.aoCurInstrs) == 1:
5708 oInstr = self.aoCurInstrs[0];
5709 if oInstr.sStats is None:
5710 oInstr.sStats = asArgs[1];
5711 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5712
5713 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5714 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5715 if asArgs is not None:
5716 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5717 asArgs[7], []);
5718 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5719 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5720 if asArgs is not None:
5721 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5722 asArgs[8], [asArgs[6],]);
5723 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5724 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5725 if asArgs is not None:
5726 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5727 asArgs[9], [asArgs[6], asArgs[7]]);
5728 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5729 # a_fIemHints)
5730 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5731 if asArgs is not None:
5732 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5733 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5734 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5735 # a_fIemHints)
5736 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5737 if asArgs is not None:
5738 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5739 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5740
5741 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5742 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5743 if asArgs is not None:
5744 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5745 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5746 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5747 if asArgs is not None:
5748 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5749 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5750 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5751 if asArgs is not None:
5752 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5753 [asArgs[4], asArgs[5],]);
5754 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5755 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5756 if asArgs is not None:
5757 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5758 [asArgs[4], asArgs[5], asArgs[6],]);
5759 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5760 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5761 if asArgs is not None:
5762 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5763 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5764
5765 #
5766 # IEM_MC_BEGIN + IEM_MC_END.
5767 # We must support multiple instances per code snippet.
5768 #
5769 offCode = sCode.find('IEM_MC_');
5770 if offCode >= 0:
5771 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5772 if oMatch.group(1) == 'END':
5773 self.workerIemMcEnd(offLine + oMatch.start());
5774 elif oMatch.group(1) == 'BEGIN':
5775 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5776 else:
5777 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5778 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5779 return True;
5780
5781 return False;
5782
5783 def workerPreprocessorRecreateMacroRegex(self):
5784 """
5785 Recreates self.oReMacros when self.dMacros changes.
5786 """
5787 if self.dMacros:
5788 sRegex = '';
5789 for sName, oMacro in self.dMacros.items():
5790 if sRegex:
5791 sRegex += r'|' + sName;
5792 else:
5793 sRegex = r'\b(' + sName;
5794 if oMacro.asArgs is not None:
5795 sRegex += r'\s*\(';
5796 else:
5797 sRegex += r'\b';
5798 sRegex += ')';
5799 self.oReMacros = re.compile(sRegex);
5800 else:
5801 self.oReMacros = None;
5802 return True;
5803
5804 def workerPreprocessorDefine(self, sRest):
5805 """
5806 Handles a macro #define, the sRest is what follows after the directive word.
5807 """
5808 assert sRest[-1] == '\n';
5809
5810 #
5811 # If using line continutation, just concat all the lines together,
5812 # preserving the newline character but not the escaping.
5813 #
5814 iLineStart = self.iLine;
5815 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5816 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5817 self.iLine += 1;
5818 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5819
5820 #
5821 # Use regex to split out the name, argument list and body.
5822 # If this fails, we assume it's a simple macro.
5823 #
5824 oMatch = self.oReHashDefine2.match(sRest);
5825 if oMatch:
5826 sAllArgs = oMatch.group(2).strip();
5827 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5828 sBody = oMatch.group(3);
5829 else:
5830 oMatch = self.oReHashDefine3.match(sRest);
5831 if not oMatch:
5832 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5833 return self.error('bogus macro definition: %s' % (sRest,));
5834 asArgs = None;
5835 sBody = oMatch.group(2);
5836 sName = oMatch.group(1);
5837 assert sName == sName.strip();
5838 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5839
5840 #
5841 # Is this of any interest to us? We do NOT support MC blocks wihtin
5842 # nested macro expansion, just to avoid lots of extra work.
5843 #
5844 # There is only limited support for macros expanding to partial MC blocks.
5845 #
5846 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5847 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5848 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5849 # siblings in the recompiler. This is a lot simpler than nested macro
5850 # expansion and lots of heuristics for locating all the relevant macros.
5851 # Also, this way we don't produce lots of unnecessary threaded functions.
5852 #
5853 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5854 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5855 return True;
5856
5857 #
5858 # Add the macro.
5859 #
5860 if self.fDebugPreproc:
5861 self.debug('#define %s on line %u' % (sName, self.iLine,));
5862 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5863 return self.workerPreprocessorRecreateMacroRegex();
5864
5865 def workerPreprocessorUndef(self, sRest):
5866 """
5867 Handles a macro #undef, the sRest is what follows after the directive word.
5868 """
5869 # Quick comment strip and isolate the name.
5870 offSlash = sRest.find('/');
5871 if offSlash > 0:
5872 sRest = sRest[:offSlash];
5873 sName = sRest.strip();
5874
5875 # Remove the macro if we're clocking it.
5876 if sName in self.dMacros:
5877 if self.fDebugPreproc:
5878 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5879 del self.dMacros[sName];
5880 return self.workerPreprocessorRecreateMacroRegex();
5881
5882 return True;
5883
5884 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5885 """
5886 Handles an #if, #ifdef, #ifndef or #elif directive.
5887 """
5888 #
5889 # Sanity check #elif.
5890 #
5891 if sDirective == 'elif':
5892 if len(self.aoCppCondStack) == 0:
5893 self.raiseError('#elif without #if');
5894 if self.aoCppCondStack[-1].fInElse:
5895 self.raiseError('#elif after #else');
5896
5897 #
5898 # If using line continutation, just concat all the lines together,
5899 # stripping both the newline and escape characters.
5900 #
5901 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5902 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5903 self.iLine += 1;
5904
5905 # Strip it of all comments and leading and trailing blanks.
5906 sRest = self.stripComments(sRest).strip();
5907
5908 #
5909 # Stash it.
5910 #
5911 try:
5912 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5913 except Exception as oXcpt:
5914 self.raiseError(oXcpt.args[0]);
5915
5916 if sDirective == 'elif':
5917 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5918 else:
5919 self.aoCppCondStack.append(oPreprocCond);
5920
5921 return True;
5922
5923 def workerPreprocessorElse(self):
5924 """
5925 Handles an #else directive.
5926 """
5927 if len(self.aoCppCondStack) == 0:
5928 self.raiseError('#else without #if');
5929 if self.aoCppCondStack[-1].fInElse:
5930 self.raiseError('Another #else after #else');
5931
5932 self.aoCppCondStack[-1].fInElse = True;
5933 return True;
5934
5935 def workerPreprocessorEndif(self):
5936 """
5937 Handles an #endif directive.
5938 """
5939 if len(self.aoCppCondStack) == 0:
5940 self.raiseError('#endif without #if');
5941
5942 self.aoCppCondStack.pop();
5943 return True;
5944
5945 def checkPreprocessorDirective(self, sLine):
5946 """
5947 Handles a preprocessor directive.
5948 """
5949 # Skip past the preprocessor hash.
5950 off = sLine.find('#');
5951 assert off >= 0;
5952 off += 1;
5953 while off < len(sLine) and sLine[off].isspace():
5954 off += 1;
5955
5956 # Extract the directive.
5957 offDirective = off;
5958 while off < len(sLine) and not sLine[off].isspace():
5959 off += 1;
5960 sDirective = sLine[offDirective:off];
5961 if self.fDebugPreproc:
5962 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5963
5964 # Skip spaces following it to where the arguments/whatever starts.
5965 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5966 off += 1;
5967 sTail = sLine[off:];
5968
5969 # Handle the directive.
5970 if sDirective == 'define':
5971 return self.workerPreprocessorDefine(sTail);
5972 if sDirective == 'undef':
5973 return self.workerPreprocessorUndef(sTail);
5974 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5975 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5976 if sDirective == 'else':
5977 return self.workerPreprocessorElse();
5978 if sDirective == 'endif':
5979 return self.workerPreprocessorEndif();
5980
5981 if self.fDebugPreproc:
5982 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5983 return False;
5984
5985 def expandMacros(self, sLine, oMatch):
5986 """
5987 Expands macros we know about in the given line.
5988 Currently we ASSUME there is only one and that is what oMatch matched.
5989 """
5990 #
5991 # Get our bearings.
5992 #
5993 offMatch = oMatch.start();
5994 sName = oMatch.group(1);
5995 assert sName == sLine[oMatch.start() : oMatch.end()];
5996 fWithArgs = sName.endswith('(');
5997 if fWithArgs:
5998 sName = sName[:-1].strip();
5999 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
6000
6001 #
6002 # Deal with simple macro invocations w/o parameters.
6003 #
6004 if not fWithArgs:
6005 if self.fDebugPreproc:
6006 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6007 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6008
6009 #
6010 # Complicated macro with parameters.
6011 # Start by extracting the parameters. ASSUMES they are all on the same line!
6012 #
6013 cLevel = 1;
6014 offCur = oMatch.end();
6015 offCurArg = offCur;
6016 asArgs = [];
6017 while True:
6018 if offCur >= len(sLine):
6019 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6020 ch = sLine[offCur];
6021 if ch == '(':
6022 cLevel += 1;
6023 elif ch == ')':
6024 cLevel -= 1;
6025 if cLevel == 0:
6026 asArgs.append(sLine[offCurArg:offCur].strip());
6027 break;
6028 elif ch == ',' and cLevel == 1:
6029 asArgs.append(sLine[offCurArg:offCur].strip());
6030 offCurArg = offCur + 1;
6031 offCur += 1;
6032 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6033 asArgs = [];
6034 if len(oMacro.asArgs) != len(asArgs):
6035 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6036
6037 #
6038 # Do the expanding.
6039 #
6040 if self.fDebugPreproc:
6041 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6042 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6043
6044 def parse(self):
6045 """
6046 Parses the given file.
6047
6048 Returns number or errors.
6049 Raises exception on fatal trouble.
6050 """
6051 #self.debug('Parsing %s' % (self.sSrcFile,));
6052
6053 #
6054 # Loop thru the lines.
6055 #
6056 # Please mind that self.iLine may be updated by checkCodeForMacro and
6057 # other worker methods.
6058 #
6059 while self.iLine < len(self.asLines):
6060 sLine = self.asLines[self.iLine];
6061 self.iLine += 1;
6062 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6063
6064 # Expand macros we know about if we're currently in code.
6065 if self.iState == self.kiCode and self.oReMacros:
6066 oMatch = self.oReMacros.search(sLine);
6067 if oMatch:
6068 sLine = self.expandMacros(sLine, oMatch);
6069 if self.fDebugPreproc:
6070 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6071 self.asLines[self.iLine - 1] = sLine;
6072
6073 # Check for preprocessor directives before comments and other stuff.
6074 # ASSUMES preprocessor directives doesn't end with multiline comments.
6075 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6076 if self.fDebugPreproc:
6077 self.debug('line %d: preproc' % (self.iLine,));
6078 self.checkPreprocessorDirective(sLine);
6079 else:
6080 # Look for comments.
6081 offSlash = sLine.find('/');
6082 if offSlash >= 0:
6083 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6084 offLine = 0;
6085 while offLine < len(sLine):
6086 if self.iState == self.kiCode:
6087 # Look for substantial multiline comment so we pass the following MC as a whole line:
6088 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6089 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6090 offHit = sLine.find('/*', offLine);
6091 while offHit >= 0:
6092 offEnd = sLine.find('*/', offHit + 2);
6093 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6094 break;
6095 offHit = sLine.find('/*', offEnd);
6096
6097 if offHit >= 0:
6098 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6099 self.sComment = '';
6100 self.iCommentLine = self.iLine;
6101 self.iState = self.kiCommentMulti;
6102 offLine = offHit + 2;
6103 else:
6104 self.checkCodeForMacro(sLine[offLine:], offLine);
6105 offLine = len(sLine);
6106
6107 elif self.iState == self.kiCommentMulti:
6108 offHit = sLine.find('*/', offLine);
6109 if offHit >= 0:
6110 self.sComment += sLine[offLine:offHit];
6111 self.iState = self.kiCode;
6112 offLine = offHit + 2;
6113 self.parseComment();
6114 else:
6115 self.sComment += sLine[offLine:];
6116 offLine = len(sLine);
6117 else:
6118 assert False;
6119 # C++ line comment.
6120 elif offSlash > 0:
6121 self.checkCodeForMacro(sLine[:offSlash], 0);
6122
6123 # No slash, but append the line if in multi-line comment.
6124 elif self.iState == self.kiCommentMulti:
6125 #self.debug('line %d: multi' % (self.iLine,));
6126 self.sComment += sLine;
6127
6128 # No slash, but check code line for relevant macro.
6129 elif ( self.iState == self.kiCode
6130 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6131 #self.debug('line %d: macro' % (self.iLine,));
6132 self.checkCodeForMacro(sLine, 0);
6133
6134 # If the line is a '}' in the first position, complete the instructions.
6135 elif self.iState == self.kiCode and sLine[0] == '}':
6136 #self.debug('line %d: }' % (self.iLine,));
6137 self.doneInstructions(fEndOfFunction = True);
6138
6139 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6140 # so we can check/add @oppfx info from it.
6141 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6142 self.parseFunctionTable(sLine);
6143
6144 self.doneInstructions(fEndOfFunction = True);
6145 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6146 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6147 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6148 return self.printErrors();
6149
6150# Some sanity checking.
6151def __sanityCheckEFlagsClasses():
6152 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6153 for sAttrib, asFlags in dLists.items():
6154 for sFlag in asFlags:
6155 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6156__sanityCheckEFlagsClasses();
6157
6158## The parsed content of IEMAllInstCommonBodyMacros.h.
6159g_oParsedCommonBodyMacros = None # type: SimpleParser
6160
6161def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6162 """
6163 Parses one source file for instruction specfications.
6164 """
6165 #
6166 # Read sSrcFile into a line array.
6167 #
6168 try:
6169 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6170 except Exception as oXcpt:
6171 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6172 try:
6173 asLines = oFile.readlines();
6174 except Exception as oXcpt:
6175 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6176 finally:
6177 oFile.close();
6178
6179 #
6180 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6181 # can use the macros from it when processing the other files.
6182 #
6183 global g_oParsedCommonBodyMacros;
6184 if g_oParsedCommonBodyMacros is None:
6185 # Locate the file.
6186 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6187 if not os.path.isfile(sCommonBodyMacros):
6188 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6189
6190 # Read it.
6191 try:
6192 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6193 asIncFiles = oIncFile.readlines();
6194 except Exception as oXcpt:
6195 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6196
6197 # Parse it.
6198 try:
6199 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6200 if oParser.parse() != 0:
6201 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6202 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6203 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6204 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6205 oParser.cTotalMcBlocks,
6206 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6207 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6208 except ParserException as oXcpt:
6209 print(str(oXcpt), file = sys.stderr);
6210 raise;
6211 g_oParsedCommonBodyMacros = oParser;
6212
6213 #
6214 # Do the parsing.
6215 #
6216 try:
6217 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6218 return (oParser.parse(), oParser) ;
6219 except ParserException as oXcpt:
6220 print(str(oXcpt), file = sys.stderr);
6221 raise;
6222
6223
6224def __doTestCopying():
6225 """
6226 Executes the asCopyTests instructions.
6227 """
6228 asErrors = [];
6229 for oDstInstr in g_aoAllInstructions:
6230 if oDstInstr.asCopyTests:
6231 for sSrcInstr in oDstInstr.asCopyTests:
6232 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6233 if oSrcInstr:
6234 aoSrcInstrs = [oSrcInstr,];
6235 else:
6236 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6237 if aoSrcInstrs:
6238 for oSrcInstr in aoSrcInstrs:
6239 if oSrcInstr != oDstInstr:
6240 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6241 else:
6242 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6243 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6244 else:
6245 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6246 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6247
6248 if asErrors:
6249 sys.stderr.write(u''.join(asErrors));
6250 return len(asErrors);
6251
6252
6253def __applyOnlyTest():
6254 """
6255 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6256 all other instructions so that only these get tested.
6257 """
6258 if g_aoOnlyTestInstructions:
6259 for oInstr in g_aoAllInstructions:
6260 if oInstr.aoTests:
6261 if oInstr not in g_aoOnlyTestInstructions:
6262 oInstr.aoTests = [];
6263 return 0;
6264
6265## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6266g_aaoAllInstrFilesAndDefaultMapAndSet = (
6267 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6268 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6269 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6270 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6271 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6272 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6273 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6274 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6275 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6276);
6277
6278def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6279 """
6280 Parses all the IEMAllInstruction*.cpp.h files.
6281
6282 Returns a list of the parsers on success.
6283 Raises exception on failure.
6284 """
6285 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6286 cErrors = 0;
6287 aoParsers = [];
6288 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6289 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6290 sFilename = os.path.join(sSrcDir, sFilename);
6291 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6292 cErrors += cThisErrors;
6293 aoParsers.append(oParser);
6294 cErrors += __doTestCopying();
6295 cErrors += __applyOnlyTest();
6296
6297 # Total stub stats:
6298 cTotalStubs = 0;
6299 for oInstr in g_aoAllInstructions:
6300 cTotalStubs += oInstr.fStub;
6301 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6302 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6303 file = sys.stderr);
6304
6305 if cErrors != 0:
6306 raise Exception('%d parse errors' % (cErrors,));
6307 return aoParsers;
6308
6309
6310def parseFiles(asFiles, sHostArch = None):
6311 """
6312 Parses a selection of IEMAllInstruction*.cpp.h files.
6313
6314 Returns a list of the parsers on success.
6315 Raises exception on failure.
6316 """
6317 # Look up default maps for the files and call __parseFilesWorker to do the job.
6318 asFilesAndDefaultMap = [];
6319 for sFilename in asFiles:
6320 sName = os.path.split(sFilename)[1].lower();
6321 sMap = None;
6322 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6323 if aoInfo[0].lower() == sName:
6324 sMap = aoInfo[1];
6325 break;
6326 if not sMap:
6327 raise Exception('Unable to classify file: %s' % (sFilename,));
6328 asFilesAndDefaultMap.append((sFilename, sMap));
6329
6330 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6331
6332
6333def parseAll(sHostArch = None):
6334 """
6335 Parses all the IEMAllInstruction*.cpp.h files.
6336
6337 Returns a list of the parsers on success.
6338 Raises exception on failure.
6339 """
6340 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6341
6342
6343#
6344# Generators (may perhaps move later).
6345#
6346def __formatDisassemblerTableEntry(oInstr):
6347 """
6348 """
6349 sMacro = 'OP';
6350 cMaxOperands = 3;
6351 if len(oInstr.aoOperands) > 3:
6352 sMacro = 'OPVEX'
6353 cMaxOperands = 4;
6354 assert len(oInstr.aoOperands) <= cMaxOperands;
6355
6356 #
6357 # Format string.
6358 #
6359 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6360 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6361 sTmp += ' ' if iOperand == 0 else ',';
6362 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6363 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6364 else:
6365 sTmp += g_kdOpTypes[oOperand.sType][2];
6366 sTmp += '",';
6367 asColumns = [ sTmp, ];
6368
6369 #
6370 # Decoders.
6371 #
6372 iStart = len(asColumns);
6373 if oInstr.sEncoding is None:
6374 pass;
6375 elif oInstr.sEncoding == 'ModR/M':
6376 # ASSUME the first operand is using the ModR/M encoding
6377 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6378 asColumns.append('IDX_ParseModRM,');
6379 elif oInstr.sEncoding in [ 'prefix', ]:
6380 for oOperand in oInstr.aoOperands:
6381 asColumns.append('0,');
6382 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6383 pass;
6384 elif oInstr.sEncoding == 'VEX.ModR/M':
6385 asColumns.append('IDX_ParseModRM,');
6386 elif oInstr.sEncoding == 'vex2':
6387 asColumns.append('IDX_ParseVex2b,')
6388 elif oInstr.sEncoding == 'vex3':
6389 asColumns.append('IDX_ParseVex3b,')
6390 elif oInstr.sEncoding in g_dInstructionMaps:
6391 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6392 else:
6393 ## @todo
6394 #IDX_ParseTwoByteEsc,
6395 #IDX_ParseGrp1,
6396 #IDX_ParseShiftGrp2,
6397 #IDX_ParseGrp3,
6398 #IDX_ParseGrp4,
6399 #IDX_ParseGrp5,
6400 #IDX_Parse3DNow,
6401 #IDX_ParseGrp6,
6402 #IDX_ParseGrp7,
6403 #IDX_ParseGrp8,
6404 #IDX_ParseGrp9,
6405 #IDX_ParseGrp10,
6406 #IDX_ParseGrp12,
6407 #IDX_ParseGrp13,
6408 #IDX_ParseGrp14,
6409 #IDX_ParseGrp15,
6410 #IDX_ParseGrp16,
6411 #IDX_ParseThreeByteEsc4,
6412 #IDX_ParseThreeByteEsc5,
6413 #IDX_ParseModFence,
6414 #IDX_ParseEscFP,
6415 #IDX_ParseNopPause,
6416 #IDX_ParseInvOpModRM,
6417 assert False, str(oInstr);
6418
6419 # Check for immediates and stuff in the remaining operands.
6420 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6421 sIdx = g_kdOpTypes[oOperand.sType][0];
6422 #if sIdx != 'IDX_UseModRM':
6423 asColumns.append(sIdx + ',');
6424 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6425
6426 #
6427 # Opcode and operands.
6428 #
6429 assert oInstr.sDisEnum, str(oInstr);
6430 asColumns.append(oInstr.sDisEnum + ',');
6431 iStart = len(asColumns)
6432 for oOperand in oInstr.aoOperands:
6433 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6434 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6435
6436 #
6437 # Flags.
6438 #
6439 sTmp = '';
6440 for sHint in sorted(oInstr.dHints.keys()):
6441 sDefine = g_kdHints[sHint];
6442 if sDefine.startswith('DISOPTYPE_'):
6443 if sTmp:
6444 sTmp += ' | ' + sDefine;
6445 else:
6446 sTmp += sDefine;
6447 if sTmp:
6448 sTmp += '),';
6449 else:
6450 sTmp += '0),';
6451 asColumns.append(sTmp);
6452
6453 #
6454 # Format the columns into a line.
6455 #
6456 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6457 sLine = '';
6458 for i, s in enumerate(asColumns):
6459 if len(sLine) < aoffColumns[i]:
6460 sLine += ' ' * (aoffColumns[i] - len(sLine));
6461 else:
6462 sLine += ' ';
6463 sLine += s;
6464
6465 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6466 # DISOPTYPE_HARMLESS),
6467 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6468 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6469 return sLine;
6470
6471def __checkIfShortTable(aoTableOrdered, oMap):
6472 """
6473 Returns (iInstr, cInstructions, fShortTable)
6474 """
6475
6476 # Determin how much we can trim off.
6477 cInstructions = len(aoTableOrdered);
6478 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6479 cInstructions -= 1;
6480
6481 iInstr = 0;
6482 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6483 iInstr += 1;
6484
6485 # If we can save more than 30%, we go for the short table version.
6486 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6487 return (iInstr, cInstructions, True);
6488 _ = oMap; # Use this for overriding.
6489
6490 # Output the full table.
6491 return (0, len(aoTableOrdered), False);
6492
6493def generateDisassemblerTables(oDstFile = sys.stdout):
6494 """
6495 Generates disassembler tables.
6496
6497 Returns exit code.
6498 """
6499
6500 #
6501 # Parse all.
6502 #
6503 try:
6504 parseAll();
6505 except Exception as oXcpt:
6506 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6507 traceback.print_exc(file = sys.stderr);
6508 return 1;
6509
6510
6511 #
6512 # The disassembler uses a slightly different table layout to save space,
6513 # since several of the prefix varia
6514 #
6515 aoDisasmMaps = [];
6516 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6517 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6518 if oMap.sSelector != 'byte+pfx':
6519 aoDisasmMaps.append(oMap);
6520 else:
6521 # Split the map by prefix.
6522 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6523 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6524 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6525 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6526
6527 #
6528 # Dump each map.
6529 #
6530 asHeaderLines = [];
6531 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6532 for oMap in aoDisasmMaps:
6533 sName = oMap.sName;
6534
6535 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6536
6537 #
6538 # Get the instructions for the map and see if we can do a short version or not.
6539 #
6540 aoTableOrder = oMap.getInstructionsInTableOrder();
6541 cEntriesPerByte = oMap.getEntriesPerByte();
6542 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6543
6544 #
6545 # Output the table start.
6546 # Note! Short tables are static and only accessible via the map range record.
6547 #
6548 asLines = [];
6549 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6550 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6551 if fShortTable:
6552 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6553 else:
6554 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6555 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6556 asLines.append('{');
6557
6558 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6559 asLines.append(' /* %#04x: */' % (iInstrStart,));
6560
6561 #
6562 # Output the instructions.
6563 #
6564 iInstr = iInstrStart;
6565 while iInstr < iInstrEnd:
6566 oInstr = aoTableOrder[iInstr];
6567 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6568 if iInstr != iInstrStart:
6569 asLines.append('');
6570 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6571
6572 if oInstr is None:
6573 # Invalid. Optimize blocks of invalid instructions.
6574 cInvalidInstrs = 1;
6575 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6576 cInvalidInstrs += 1;
6577 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6578 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6579 iInstr += 0x10 * cEntriesPerByte - 1;
6580 elif cEntriesPerByte > 1:
6581 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6582 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6583 iInstr += 3;
6584 else:
6585 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6586 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6587 else:
6588 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6589 elif isinstance(oInstr, list):
6590 if len(oInstr) != 0:
6591 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6592 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6593 else:
6594 asLines.append(__formatDisassemblerTableEntry(oInstr));
6595 else:
6596 asLines.append(__formatDisassemblerTableEntry(oInstr));
6597
6598 iInstr += 1;
6599
6600 if iInstrStart >= iInstrEnd:
6601 asLines.append(' /* dummy */ INVALID_OPCODE');
6602
6603 asLines.append('};');
6604 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6605
6606 #
6607 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6608 #
6609 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6610 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6611 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6612
6613 #
6614 # Write out the lines.
6615 #
6616 oDstFile.write('\n'.join(asLines));
6617 oDstFile.write('\n');
6618 oDstFile.write('\n');
6619 #break; #for now
6620 return 0;
6621
6622if __name__ == '__main__':
6623 sys.exit(generateDisassemblerTables());
6624
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette