VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103814

Last change on this file since 103814 was 103814, checked in by vboxsync, 9 months ago

VMM/IEM: Implement native emitters for IEM_MC_STORE_XREG_U32_ZX_U128() and IEM_MC_STORE_XREG_U64_ZX_U128(), bugref:10614

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 321.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103814 2024-03-13 07:20:44Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103814 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 ## All the MC blocks associated with this instruction.
1508 self.aoMcBlocks = [] # type: List[McBlock]
1509
1510 def toString(self, fRepr = False):
1511 """ Turn object into a string. """
1512 aasFields = [];
1513
1514 aasFields.append(['opcode', self.sOpcode]);
1515 if self.sPrefix:
1516 aasFields.append(['prefix', self.sPrefix]);
1517 aasFields.append(['mnemonic', self.sMnemonic]);
1518 for iOperand, oOperand in enumerate(self.aoOperands):
1519 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1520 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1521 aasFields.append(['encoding', self.sEncoding]);
1522 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1523 aasFields.append(['disenum', self.sDisEnum]);
1524 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1525 aasFields.append(['group', self.sGroup]);
1526 if self.fUnused: aasFields.append(['unused', 'True']);
1527 if self.fInvalid: aasFields.append(['invalid', 'True']);
1528 aasFields.append(['invlstyle', self.sInvalidStyle]);
1529 aasFields.append(['fltest', self.asFlTest]);
1530 aasFields.append(['flmodify', self.asFlModify]);
1531 aasFields.append(['flundef', self.asFlUndefined]);
1532 aasFields.append(['flset', self.asFlSet]);
1533 aasFields.append(['flclear', self.asFlClear]);
1534 aasFields.append(['mincpu', self.sMinCpu]);
1535 aasFields.append(['stats', self.sStats]);
1536 aasFields.append(['sFunction', self.sFunction]);
1537 if self.fStub: aasFields.append(['fStub', 'True']);
1538 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1539 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1540 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1541 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1542
1543 sRet = '<' if fRepr else '';
1544 for sField, sValue in aasFields:
1545 if sValue is not None:
1546 if len(sRet) > 1:
1547 sRet += '; ';
1548 sRet += '%s=%s' % (sField, sValue,);
1549 if fRepr:
1550 sRet += '>';
1551
1552 return sRet;
1553
1554 def __str__(self):
1555 """ Provide string represenation. """
1556 return self.toString(False);
1557
1558 def __repr__(self):
1559 """ Provide unambigious string representation. """
1560 return self.toString(True);
1561
1562 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1563 """
1564 Makes a copy of the object for the purpose of putting in a different map
1565 or a different place in the current map.
1566 """
1567 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1568
1569 oCopy.oParent = self;
1570 oCopy.sMnemonic = self.sMnemonic;
1571 oCopy.sBrief = self.sBrief;
1572 oCopy.asDescSections = list(self.asDescSections);
1573 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1574 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1575 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1576 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1577 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1578 oCopy.sEncoding = self.sEncoding;
1579 oCopy.asFlTest = self.asFlTest;
1580 oCopy.asFlModify = self.asFlModify;
1581 oCopy.asFlUndefined = self.asFlUndefined;
1582 oCopy.asFlSet = self.asFlSet;
1583 oCopy.asFlClear = self.asFlClear;
1584 oCopy.dHints = dict(self.dHints);
1585 oCopy.sDisEnum = self.sDisEnum;
1586 oCopy.asCpuIds = list(self.asCpuIds);
1587 oCopy.asReqFeatures = list(self.asReqFeatures);
1588 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1589 oCopy.sMinCpu = self.sMinCpu;
1590 oCopy.oCpuExpr = self.oCpuExpr;
1591 oCopy.sGroup = self.sGroup;
1592 oCopy.fUnused = self.fUnused;
1593 oCopy.fInvalid = self.fInvalid;
1594 oCopy.sInvalidStyle = self.sInvalidStyle;
1595 oCopy.sXcptType = self.sXcptType;
1596
1597 oCopy.sStats = self.sStats;
1598 oCopy.sFunction = self.sFunction;
1599 oCopy.fStub = self.fStub;
1600 oCopy.fUdStub = self.fUdStub;
1601
1602 oCopy.iLineCompleted = self.iLineCompleted;
1603 oCopy.cOpTags = self.cOpTags;
1604 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1605 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1606
1607 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1608 oCopy.asRawDisParams = list(self.asRawDisParams);
1609 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1610 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1611 oCopy.asCopyTests = list(self.asCopyTests);
1612
1613 return oCopy;
1614
1615 def getOpcodeByte(self):
1616 """
1617 Decodes sOpcode into a byte range integer value.
1618 Raises exception if sOpcode is None or invalid.
1619 """
1620 if self.sOpcode is None:
1621 raise Exception('No opcode byte for %s!' % (self,));
1622 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1623
1624 # Full hex byte form.
1625 if sOpcode[:2] == '0x':
1626 return int(sOpcode, 16);
1627
1628 # The /r form:
1629 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1630 return int(sOpcode[1:]) << 3;
1631
1632 # The 11/r form:
1633 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1634 return (int(sOpcode[-1:]) << 3) | 0xc0;
1635
1636 # The !11/r form (returns mod=1):
1637 ## @todo this doesn't really work...
1638 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1639 return (int(sOpcode[-1:]) << 3) | 0x80;
1640
1641 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1642
1643 @staticmethod
1644 def _flagsToIntegerMask(asFlags):
1645 """
1646 Returns the integer mask value for asFlags.
1647 """
1648 uRet = 0;
1649 if asFlags:
1650 for sFlag in asFlags:
1651 sConstant = g_kdEFlagsMnemonics[sFlag];
1652 assert sConstant[0] != '!', sConstant
1653 uRet |= g_kdX86EFlagsConstants[sConstant];
1654 return uRet;
1655
1656 def getTestedFlagsMask(self):
1657 """ Returns asFlTest into a integer mask value """
1658 return self._flagsToIntegerMask(self.asFlTest);
1659
1660 def getModifiedFlagsMask(self):
1661 """ Returns asFlModify into a integer mask value """
1662 return self._flagsToIntegerMask(self.asFlModify);
1663
1664 def getUndefinedFlagsMask(self):
1665 """ Returns asFlUndefined into a integer mask value """
1666 return self._flagsToIntegerMask(self.asFlUndefined);
1667
1668 def getSetFlagsMask(self):
1669 """ Returns asFlSet into a integer mask value """
1670 return self._flagsToIntegerMask(self.asFlSet);
1671
1672 def getClearedFlagsMask(self):
1673 """ Returns asFlClear into a integer mask value """
1674 return self._flagsToIntegerMask(self.asFlClear);
1675
1676 @staticmethod
1677 def _flagsToC(asFlags):
1678 """
1679 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1680 """
1681 if asFlags:
1682 asRet = [];
1683 for sFlag in asFlags:
1684 sConstant = g_kdEFlagsMnemonics[sFlag];
1685 assert sConstant[0] != '!', sConstant
1686 asRet.append(sConstant);
1687 return ' | '.join(asRet);
1688 return '0';
1689
1690 def getTestedFlagsCStyle(self):
1691 """ Returns asFlTest as C constants ored together. """
1692 return self._flagsToC(self.asFlTest);
1693
1694 def getModifiedFlagsCStyle(self):
1695 """ Returns asFlModify as C constants ored together. """
1696 return self._flagsToC(self.asFlModify);
1697
1698 def getUndefinedFlagsCStyle(self):
1699 """ Returns asFlUndefined as C constants ored together. """
1700 return self._flagsToC(self.asFlUndefined);
1701
1702 def getSetFlagsCStyle(self):
1703 """ Returns asFlSet as C constants ored together. """
1704 return self._flagsToC(self.asFlSet);
1705
1706 def getClearedFlagsCStyle(self):
1707 """ Returns asFlClear as C constants ored together. """
1708 return self._flagsToC(self.asFlClear);
1709
1710 def onlyInVexMaps(self):
1711 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1712 if not self.aoMaps:
1713 return False;
1714 for oMap in self.aoMaps:
1715 if not oMap.isVexMap():
1716 return False;
1717 return True;
1718
1719
1720
1721## All the instructions.
1722g_aoAllInstructions = [] # type: List[Instruction]
1723
1724## All the instructions indexed by statistics name (opstat).
1725g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1726
1727## All the instructions indexed by function name (opfunction).
1728g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1729
1730## Instructions tagged by oponlytest
1731g_aoOnlyTestInstructions = [] # type: List[Instruction]
1732
1733## Instruction maps.
1734g_aoInstructionMaps = [
1735 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1736 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1737 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1738 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1739 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1740 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1741 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1742 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1743 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1744 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1745 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1746 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1747 ## @todo g_apfnEscF1_E0toFF
1748 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1749 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1750 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1751 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1752 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1753 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1754 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1755 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1756
1757 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1758 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1759 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1760 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1761 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1762 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1763 ## @todo What about g_apfnGroup9MemReg?
1764 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1765 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1766 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1767 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1768 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1769 ## @todo What about g_apfnGroup15RegReg?
1770 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1771 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1772 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1773
1774 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1775 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1776
1777 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1778 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1779 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1780 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1781 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1782 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1783
1784 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1785 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1786
1787 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1788 InstructionMap('xopmap8', sEncoding = 'xop8'),
1789 InstructionMap('xopmap9', sEncoding = 'xop9'),
1790 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1791 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1792 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1793 InstructionMap('xopmap10', sEncoding = 'xop10'),
1794 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1795];
1796g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1797g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1798
1799
1800#
1801# Decoder functions.
1802#
1803
1804class DecoderFunction(object):
1805 """
1806 Decoder function.
1807
1808 This is mainly for searching for scoping searches for variables used in
1809 microcode blocks.
1810 """
1811 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1812 self.sName = sName; ##< The function name.
1813 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1814 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1815 self.iBeginLine = iBeginLine; ##< The start line.
1816 self.iEndLine = -1; ##< The line the function (probably) ends on.
1817 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1818
1819 def complete(self, iEndLine, asLines):
1820 """
1821 Completes the function.
1822 """
1823 assert self.iEndLine == -1;
1824 self.iEndLine = iEndLine;
1825 self.asLines = asLines;
1826
1827
1828#
1829# "Microcode" statements and blocks
1830#
1831
1832class McStmt(object):
1833 """
1834 Statement in a microcode block.
1835 """
1836 def __init__(self, sName, asParams):
1837 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1838 self.asParams = asParams;
1839 self.oUser = None;
1840
1841 def renderCode(self, cchIndent = 0):
1842 """
1843 Renders the code for the statement.
1844 """
1845 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1846
1847 @staticmethod
1848 def renderCodeForList(aoStmts, cchIndent = 0):
1849 """
1850 Renders a list of statements.
1851 """
1852 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1853
1854 @staticmethod
1855 def findStmtByNames(aoStmts, dNames):
1856 """
1857 Returns first statement with any of the given names in from the list.
1858
1859 Note! The names are passed as a dictionary for quick lookup, the value
1860 does not matter.
1861 """
1862 for oStmt in aoStmts:
1863 if oStmt.sName in dNames:
1864 return oStmt;
1865 if isinstance(oStmt, McStmtCond):
1866 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1867 if not oHit:
1868 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1869 if oHit:
1870 return oHit;
1871 return None;
1872
1873 @staticmethod
1874 def countStmtsByName(aoStmts, dNames, dRet):
1875 """
1876 Searches the given list of statements for the names in the dictionary,
1877 adding each found to dRet with an occurnece count.
1878
1879 return total number of hits;
1880 """
1881 cHits = 0;
1882 for oStmt in aoStmts:
1883 if oStmt.sName in dNames:
1884 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1885 cHits += 1;
1886 if isinstance(oStmt, McStmtCond):
1887 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1888 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1889 return cHits;
1890
1891 def isCppStmt(self):
1892 """ Checks if this is a C++ statement. """
1893 return self.sName.startswith('C++');
1894
1895class McStmtCond(McStmt):
1896 """
1897 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1898 """
1899 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1900 McStmt.__init__(self, sName, asParams);
1901 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1902 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1903 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1904 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1905 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1906
1907 def renderCode(self, cchIndent = 0):
1908 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1909 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1910 if self.aoElseBranch:
1911 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1912 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1913 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1914 return sRet;
1915
1916class McStmtNativeIf(McStmtCond):
1917 """ IEM_MC_NATIVE_IF """
1918 def __init__(self, sName, asArchitectures):
1919 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1920 self.asArchitectures = asArchitectures;
1921
1922class McStmtVar(McStmt):
1923 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1924 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1925 McStmt.__init__(self, sName, asParams);
1926 self.sType = sType;
1927 self.sVarName = sVarName;
1928 self.sValue = sValue; ##< None if no assigned / const value.
1929
1930class McStmtArg(McStmtVar):
1931 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1932 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1933 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1934 self.iArg = iArg;
1935 self.sRef = sRef; ##< The reference string (local variable, register).
1936 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1937 assert sRefType in ('none', 'local');
1938
1939class McStmtCall(McStmt):
1940 """ IEM_MC_CALL_* """
1941 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1942 McStmt.__init__(self, sName, asParams);
1943 self.idxFn = iFnParam;
1944 self.idxParams = iFnParam + 1;
1945 self.sFn = asParams[iFnParam];
1946 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1947
1948class McStmtAssertEFlags(McStmt):
1949 """
1950 IEM_MC_ASSERT_EFLAGS
1951 """
1952 def __init__(self, oInstruction):
1953 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1954 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1955
1956
1957class McCppGeneric(McStmt):
1958 """
1959 Generic C++/C statement.
1960 """
1961 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1962 McStmt.__init__(self, sName, [sCode,]);
1963 self.fDecode = fDecode;
1964 self.cchIndent = cchIndent;
1965
1966 def renderCode(self, cchIndent = 0):
1967 cchIndent += self.cchIndent;
1968 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1969 if self.fDecode:
1970 sRet = sRet.replace('\n', ' // C++ decode\n');
1971 else:
1972 sRet = sRet.replace('\n', ' // C++ normal\n');
1973 return sRet;
1974
1975class McCppCall(McCppGeneric):
1976 """
1977 A generic C++/C call statement.
1978
1979 The sName is still 'C++', so the function name is in the first parameter
1980 and the the arguments in the subsequent ones.
1981 """
1982 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1983 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1984 self.asParams.extend(asArgs);
1985
1986 def renderCode(self, cchIndent = 0):
1987 cchIndent += self.cchIndent;
1988 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1989 if self.fDecode:
1990 sRet += ' // C++ decode\n';
1991 else:
1992 sRet += ' // C++ normal\n';
1993 return sRet;
1994
1995class McCppCond(McStmtCond):
1996 """
1997 C++/C 'if' statement.
1998 """
1999 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2000 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2001 self.fDecode = fDecode;
2002 self.cchIndent = cchIndent;
2003
2004 def renderCode(self, cchIndent = 0):
2005 cchIndent += self.cchIndent;
2006 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2007 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2008 sRet += ' ' * cchIndent + '{\n';
2009 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2010 sRet += ' ' * cchIndent + '}\n';
2011 if self.aoElseBranch:
2012 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2013 sRet += ' ' * cchIndent + '{\n';
2014 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2015 sRet += ' ' * cchIndent + '}\n';
2016 return sRet;
2017
2018class McCppPreProc(McCppGeneric):
2019 """
2020 C++/C Preprocessor directive.
2021 """
2022 def __init__(self, sCode):
2023 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2024
2025 def renderCode(self, cchIndent = 0):
2026 return self.asParams[0] + '\n';
2027
2028
2029## IEM_MC_F_XXX values.
2030g_kdMcFlags = {
2031 'IEM_MC_F_ONLY_8086': (),
2032 'IEM_MC_F_MIN_186': (),
2033 'IEM_MC_F_MIN_286': (),
2034 'IEM_MC_F_NOT_286_OR_OLDER': (),
2035 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2036 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2037 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2038 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2039 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2040 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2041 'IEM_MC_F_NOT_64BIT': (),
2042};
2043## IEM_MC_F_XXX values.
2044g_kdCImplFlags = {
2045 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2046 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2047 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2048 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2049 'IEM_CIMPL_F_BRANCH_FAR': (),
2050 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2051 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2052 'IEM_CIMPL_F_BRANCH_STACK': (),
2053 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2054 'IEM_CIMPL_F_MODE': (),
2055 'IEM_CIMPL_F_RFLAGS': (),
2056 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2057 'IEM_CIMPL_F_STATUS_FLAGS': (),
2058 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2059 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2060 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2061 'IEM_CIMPL_F_VMEXIT': (),
2062 'IEM_CIMPL_F_FPU': (),
2063 'IEM_CIMPL_F_REP': (),
2064 'IEM_CIMPL_F_IO': (),
2065 'IEM_CIMPL_F_END_TB': (),
2066 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2067 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2068 'IEM_CIMPL_F_CALLS_CIMPL': (),
2069 'IEM_CIMPL_F_CALLS_AIMPL': (),
2070 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2071};
2072class McBlock(object):
2073 """
2074 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2075 """
2076
2077 ## @name Macro expansion types.
2078 ## @{
2079 kiMacroExp_None = 0;
2080 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2081 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2082 ## @}
2083
2084 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2085 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2086 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2087 self.fDeferToCImpl = fDeferToCImpl;
2088 ## The source file containing the block.
2089 self.sSrcFile = sSrcFile;
2090 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2091 self.iBeginLine = iBeginLine;
2092 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2093 self.offBeginLine = offBeginLine;
2094 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2095 self.iEndLine = -1;
2096 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2097 self.offEndLine = 0;
2098 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2099 self.offAfterEnd = 0;
2100 ## The function the block resides in.
2101 self.oFunction = oFunction;
2102 ## The name of the function the block resides in. DEPRECATED.
2103 self.sFunction = oFunction.sName;
2104 ## The block number within the function.
2105 self.iInFunction = iInFunction;
2106 ## The instruction this block is associated with - can be None.
2107 self.oInstruction = oInstruction # type: Instruction
2108 ## Indentation level of the block.
2109 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2110 ## The raw lines the block is made up of.
2111 self.asLines = [] # type: List[str]
2112 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2113 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2114 self.iMacroExp = self.kiMacroExp_None;
2115 ## IEM_MC_BEGIN: Argument count.
2116 self.cArgs = -1;
2117 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2118 self.aoArgs = [] # type: List[McStmtArg]
2119 ## IEM_MC_BEGIN: Locals count.
2120 self.cLocals = -1;
2121 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2122 self.aoLocals = [] # type: List[McStmtVar]
2123 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2124 self.dsMcFlags = {} # type: Dict[str, bool]
2125 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2126 self.dsCImplFlags = {} # type: Dict[str, bool]
2127 ## Decoded statements in the block.
2128 self.aoStmts = [] # type: List[McStmt]
2129
2130 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2131 """
2132 Completes the microcode block.
2133 """
2134 assert self.iEndLine == -1;
2135 self.iEndLine = iEndLine;
2136 self.offEndLine = offEndLine;
2137 self.offAfterEnd = offAfterEnd;
2138 self.asLines = asLines;
2139
2140 def raiseDecodeError(self, sRawCode, off, sMessage):
2141 """ Raises a decoding error. """
2142 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2143 iLine = sRawCode.count('\n', 0, off);
2144 raise ParserException('%s:%d:%d: parsing error: %s'
2145 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2146
2147 def raiseStmtError(self, sName, sMessage):
2148 """ Raises a statement parser error. """
2149 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2150
2151 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2152 """ Check the parameter count, raising an error it doesn't match. """
2153 if len(asParams) != cParamsExpected:
2154 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2155 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2156 return True;
2157
2158 @staticmethod
2159 def parseMcGeneric(oSelf, sName, asParams):
2160 """ Generic parser that returns a plain McStmt object. """
2161 _ = oSelf;
2162 return McStmt(sName, asParams);
2163
2164 @staticmethod
2165 def parseMcGenericCond(oSelf, sName, asParams):
2166 """ Generic parser that returns a plain McStmtCond object. """
2167 _ = oSelf;
2168 return McStmtCond(sName, asParams);
2169
2170 kdArchVals = {
2171 'RT_ARCH_VAL_X86': True,
2172 'RT_ARCH_VAL_AMD64': True,
2173 'RT_ARCH_VAL_ARM32': True,
2174 'RT_ARCH_VAL_ARM64': True,
2175 'RT_ARCH_VAL_SPARC32': True,
2176 'RT_ARCH_VAL_SPARC64': True,
2177 };
2178
2179 @staticmethod
2180 def parseMcNativeIf(oSelf, sName, asParams):
2181 """ IEM_MC_NATIVE_IF """
2182 oSelf.checkStmtParamCount(sName, asParams, 1);
2183 if asParams[0].strip() == '0':
2184 asArchitectures = [];
2185 else:
2186 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2187 for sArch in asArchitectures:
2188 if sArch not in oSelf.kdArchVals:
2189 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2190 return McStmtNativeIf(sName, asArchitectures);
2191
2192 @staticmethod
2193 def parseMcBegin(oSelf, sName, asParams):
2194 """ IEM_MC_BEGIN """
2195 oSelf.checkStmtParamCount(sName, asParams, 4);
2196 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2197 oSelf.raiseStmtError(sName, 'Used more than once!');
2198 oSelf.cArgs = int(asParams[0]);
2199 oSelf.cLocals = int(asParams[1]);
2200
2201 if asParams[2] != '0':
2202 for sFlag in asParams[2].split('|'):
2203 sFlag = sFlag.strip();
2204 if sFlag not in g_kdMcFlags:
2205 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2206 oSelf.dsMcFlags[sFlag] = True;
2207 for sFlag2 in g_kdMcFlags[sFlag]:
2208 oSelf.dsMcFlags[sFlag2] = True;
2209
2210 if asParams[3] != '0':
2211 oSelf.parseCImplFlags(sName, asParams[3]);
2212
2213 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2214
2215 @staticmethod
2216 def parseMcArg(oSelf, sName, asParams):
2217 """ IEM_MC_ARG """
2218 oSelf.checkStmtParamCount(sName, asParams, 3);
2219 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2220 oSelf.aoArgs.append(oStmt);
2221 return oStmt;
2222
2223 @staticmethod
2224 def parseMcArgConst(oSelf, sName, asParams):
2225 """ IEM_MC_ARG_CONST """
2226 oSelf.checkStmtParamCount(sName, asParams, 4);
2227 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2228 oSelf.aoArgs.append(oStmt);
2229 return oStmt;
2230
2231 @staticmethod
2232 def parseMcArgLocalRef(oSelf, sName, asParams):
2233 """ IEM_MC_ARG_LOCAL_REF """
2234 oSelf.checkStmtParamCount(sName, asParams, 4);
2235 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2236 oSelf.aoArgs.append(oStmt);
2237 return oStmt;
2238
2239 @staticmethod
2240 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2241 """ IEM_MC_ARG_LOCAL_EFLAGS """
2242 oSelf.checkStmtParamCount(sName, asParams, 3);
2243 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2244 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2245 oSelf.aoLocals.append(oStmtLocal);
2246 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2247 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2248 oSelf.aoArgs.append(oStmtArg);
2249 return (oStmtLocal, oStmtArg,);
2250
2251 @staticmethod
2252 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2253 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2254 oSelf.checkStmtParamCount(sName, asParams, 0);
2255 # Note! Translate to IEM_MC_ARG_CONST
2256 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2257 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2258 oSelf.aoArgs.append(oStmt);
2259 return oStmt;
2260
2261 @staticmethod
2262 def parseMcLocal(oSelf, sName, asParams):
2263 """ IEM_MC_LOCAL """
2264 oSelf.checkStmtParamCount(sName, asParams, 2);
2265 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2266 oSelf.aoLocals.append(oStmt);
2267 return oStmt;
2268
2269 @staticmethod
2270 def parseMcLocalAssign(oSelf, sName, asParams):
2271 """ IEM_MC_LOCAL_ASSIGN """
2272 oSelf.checkStmtParamCount(sName, asParams, 3);
2273 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2274 oSelf.aoLocals.append(oStmt);
2275 return oStmt;
2276
2277 @staticmethod
2278 def parseMcLocalConst(oSelf, sName, asParams):
2279 """ IEM_MC_LOCAL_CONST """
2280 oSelf.checkStmtParamCount(sName, asParams, 3);
2281 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2282 oSelf.aoLocals.append(oStmt);
2283 return oStmt;
2284
2285 @staticmethod
2286 def parseMcLocalEFlags(oSelf, sName, asParams):
2287 """ IEM_MC_LOCAL_EFLAGS"""
2288 oSelf.checkStmtParamCount(sName, asParams, 1);
2289 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2290 oSelf.aoLocals.append(oStmt);
2291 return oStmt;
2292
2293 @staticmethod
2294 def parseMcCallAImpl(oSelf, sName, asParams):
2295 """ IEM_MC_CALL_AIMPL_3|4 """
2296 cArgs = int(sName[-1]);
2297 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2298 return McStmtCall(sName, asParams, 1, 0);
2299
2300 @staticmethod
2301 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2302 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2303 cArgs = int(sName[-1]);
2304 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2305 return McStmtCall(sName, asParams, 0);
2306
2307 @staticmethod
2308 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2309 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2310 cArgs = int(sName[-1]);
2311 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2312 return McStmtCall(sName, asParams, 0);
2313
2314 @staticmethod
2315 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2316 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2317 cArgs = int(sName[-1]);
2318 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2319 return McStmtCall(sName, asParams, 0);
2320
2321 @staticmethod
2322 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2323 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2324 cArgs = int(sName[-1]);
2325 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2326 return McStmtCall(sName, asParams, 0);
2327
2328 @staticmethod
2329 def parseMcCallSseAImpl(oSelf, sName, asParams):
2330 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2331 cArgs = int(sName[-1]);
2332 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2333 return McStmtCall(sName, asParams, 0);
2334
2335 def parseCImplFlags(self, sName, sFlags):
2336 """
2337 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2338 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2339 """
2340 if sFlags != '0':
2341 sFlags = self.stripComments(sFlags);
2342 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2343 for sFlag in sFlags.split('|'):
2344 sFlag = sFlag.strip();
2345 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2346 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2347 #print('debug: %s' % sFlag)
2348 if sFlag not in g_kdCImplFlags:
2349 if sFlag == '0':
2350 continue;
2351 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2352 self.dsCImplFlags[sFlag] = True;
2353 for sFlag2 in g_kdCImplFlags[sFlag]:
2354 self.dsCImplFlags[sFlag2] = True;
2355 return None;
2356
2357 @staticmethod
2358 def parseMcCallCImpl(oSelf, sName, asParams):
2359 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2360 cArgs = int(sName[-1]);
2361 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2362 oSelf.parseCImplFlags(sName, asParams[0]);
2363 return McStmtCall(sName, asParams, 2);
2364
2365 @staticmethod
2366 def parseMcDeferToCImpl(oSelf, sName, asParams):
2367 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2368 # Note! This code is called by workerIemMcDeferToCImplXRet.
2369 #print('debug: %s, %s,...' % (sName, asParams[0],));
2370 cArgs = int(sName[-5]);
2371 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2372 oSelf.parseCImplFlags(sName, asParams[0]);
2373 return McStmtCall(sName, asParams, 2);
2374
2375 @staticmethod
2376 def stripComments(sCode):
2377 """ Returns sCode with comments removed. """
2378 off = 0;
2379 while off < len(sCode):
2380 off = sCode.find('/', off);
2381 if off < 0 or off + 1 >= len(sCode):
2382 break;
2383
2384 if sCode[off + 1] == '/':
2385 # C++ comment.
2386 offEnd = sCode.find('\n', off + 2);
2387 if offEnd < 0:
2388 return sCode[:off].rstrip();
2389 sCode = sCode[ : off] + sCode[offEnd : ];
2390 off += 1;
2391
2392 elif sCode[off + 1] == '*':
2393 # C comment
2394 offEnd = sCode.find('*/', off + 2);
2395 if offEnd < 0:
2396 return sCode[:off].rstrip();
2397 sSep = ' ';
2398 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2399 sSep = '';
2400 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2401 off += len(sSep);
2402
2403 else:
2404 # Not a comment.
2405 off += 1;
2406 return sCode;
2407
2408 @staticmethod
2409 def extractParam(sCode, offParam):
2410 """
2411 Extracts the parameter value at offParam in sCode.
2412 Returns stripped value and the end offset of the terminating ',' or ')'.
2413 """
2414 # Extract it.
2415 cNesting = 0;
2416 offStart = offParam;
2417 while offParam < len(sCode):
2418 ch = sCode[offParam];
2419 if ch == '(':
2420 cNesting += 1;
2421 elif ch == ')':
2422 if cNesting == 0:
2423 break;
2424 cNesting -= 1;
2425 elif ch == ',' and cNesting == 0:
2426 break;
2427 offParam += 1;
2428 return (sCode[offStart : offParam].strip(), offParam);
2429
2430 @staticmethod
2431 def extractParams(sCode, offOpenParen):
2432 """
2433 Parses a parameter list.
2434 Returns the list of parameter values and the offset of the closing parentheses.
2435 Returns (None, len(sCode)) on if no closing parentheses was found.
2436 """
2437 assert sCode[offOpenParen] == '(';
2438 asParams = [];
2439 off = offOpenParen + 1;
2440 while off < len(sCode):
2441 ch = sCode[off];
2442 if ch.isspace():
2443 off += 1;
2444 elif ch != ')':
2445 (sParam, off) = McBlock.extractParam(sCode, off);
2446 asParams.append(sParam);
2447 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2448 if sCode[off] == ',':
2449 off += 1;
2450 else:
2451 return (asParams, off);
2452 return (None, off);
2453
2454 @staticmethod
2455 def findClosingBraces(sCode, off, offStop):
2456 """
2457 Finds the matching '}' for the '{' at off in sCode.
2458 Returns offset of the matching '}' on success, otherwise -1.
2459
2460 Note! Does not take comments into account.
2461 """
2462 cDepth = 1;
2463 off += 1;
2464 while off < offStop:
2465 offClose = sCode.find('}', off, offStop);
2466 if offClose < 0:
2467 break;
2468 cDepth += sCode.count('{', off, offClose);
2469 cDepth -= 1;
2470 if cDepth == 0:
2471 return offClose;
2472 off = offClose + 1;
2473 return -1;
2474
2475 @staticmethod
2476 def countSpacesAt(sCode, off, offStop):
2477 """ Returns the number of space characters at off in sCode. """
2478 offStart = off;
2479 while off < offStop and sCode[off].isspace():
2480 off += 1;
2481 return off - offStart;
2482
2483 @staticmethod
2484 def skipSpacesAt(sCode, off, offStop):
2485 """ Returns first offset at or after off for a non-space character. """
2486 return off + McBlock.countSpacesAt(sCode, off, offStop);
2487
2488 @staticmethod
2489 def isSubstrAt(sStr, off, sSubStr):
2490 """ Returns true of sSubStr is found at off in sStr. """
2491 return sStr[off : off + len(sSubStr)] == sSubStr;
2492
2493 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2494 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2495 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2496 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2497 + r')');
2498
2499 kaasConditions = (
2500 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2501 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2502 );
2503 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2504 """
2505 Decodes sRawCode[off : offStop].
2506
2507 Returns list of McStmt instances.
2508 Raises ParserException on failure.
2509 """
2510 if offStop < 0:
2511 offStop = len(sRawCode);
2512 aoStmts = [];
2513 while off < offStop:
2514 ch = sRawCode[off];
2515
2516 #
2517 # Skip spaces and comments.
2518 #
2519 if ch.isspace():
2520 off += 1;
2521
2522 elif ch == '/':
2523 ch = sRawCode[off + 1];
2524 if ch == '/': # C++ comment.
2525 off = sRawCode.find('\n', off + 2);
2526 if off < 0:
2527 break;
2528 off += 1;
2529 elif ch == '*': # C comment.
2530 off = sRawCode.find('*/', off + 2);
2531 if off < 0:
2532 break;
2533 off += 2;
2534 else:
2535 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2536
2537 #
2538 # Is it a MC statement.
2539 #
2540 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2541 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2542 # Extract it and strip comments from it.
2543 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2544 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2545 else: iCond = -1;
2546 if iCond < 0:
2547 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2548 if offEnd <= off:
2549 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2550 else:
2551 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2552 if offEnd <= off:
2553 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2554 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2555 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2556 offEnd -= 1;
2557 while offEnd > off and sRawCode[offEnd - 1].isspace():
2558 offEnd -= 1;
2559
2560 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2561
2562 # Isolate the statement name.
2563 offOpenParen = sRawStmt.find('(');
2564 if offOpenParen < 0:
2565 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2566 sName = sRawStmt[: offOpenParen].strip();
2567
2568 # Extract the parameters.
2569 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2570 if asParams is None:
2571 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2572 if offCloseParen + 1 != len(sRawStmt):
2573 self.raiseDecodeError(sRawCode, off,
2574 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2575
2576 # Hand it to the handler.
2577 fnParser = g_dMcStmtParsers.get(sName);
2578 if not fnParser:
2579 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2580 fnParser = fnParser[0];
2581 oStmt = fnParser(self, sName, asParams);
2582 if not isinstance(oStmt, (list, tuple)):
2583 aoStmts.append(oStmt);
2584 else:
2585 aoStmts.extend(oStmt);
2586
2587 #
2588 # If conditional, we need to parse the whole statement.
2589 #
2590 # For reasons of simplicity, we assume the following structure
2591 # and parse each branch in a recursive call:
2592 # IEM_MC_IF_XXX() {
2593 # IEM_MC_WHATEVER();
2594 # } IEM_MC_ELSE() {
2595 # IEM_MC_WHATEVER();
2596 # } IEM_MC_ENDIF();
2597 #
2598 if iCond >= 0:
2599 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2600 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2601
2602 # Find start of the IF block:
2603 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2604 if sRawCode[offBlock1] != '{':
2605 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2606
2607 # Find the end of it.
2608 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2609 if offBlock1End < 0:
2610 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2611
2612 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2613
2614 # Is there an else section?
2615 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2616 sElseNm = self.kaasConditions[iCond][1];
2617 if self.isSubstrAt(sRawCode, off, sElseNm):
2618 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2619 if sRawCode[off] != '(':
2620 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2621 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2622 if sRawCode[off] != ')':
2623 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2624
2625 # Find start of the ELSE block.
2626 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2627 if sRawCode[offBlock2] != '{':
2628 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2629
2630 # Find the end of it.
2631 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2632 if offBlock2End < 0:
2633 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2634
2635 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2636 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2637
2638 # Parse past the endif statement.
2639 sEndIfNm = self.kaasConditions[iCond][2];
2640 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2641 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2642 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2643 if sRawCode[off] != '(':
2644 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2645 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2646 if sRawCode[off] != ')':
2647 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ';':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2651 off += 1;
2652
2653 else:
2654 # Advance.
2655 off = offEnd + 1;
2656
2657 #
2658 # Otherwise it must be a C/C++ statement of sorts.
2659 #
2660 else:
2661 # Find the end of the statement. if and else requires special handling.
2662 sCondExpr = None;
2663 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2664 if oMatch:
2665 if oMatch.group(1)[-1] == '(':
2666 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2667 else:
2668 offEnd = oMatch.end();
2669 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2670 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2671 elif ch == '#':
2672 offEnd = sRawCode.find('\n', off, offStop);
2673 if offEnd < 0:
2674 offEnd = offStop;
2675 offEnd -= 1;
2676 while offEnd > off and sRawCode[offEnd - 1].isspace():
2677 offEnd -= 1;
2678 else:
2679 offEnd = sRawCode.find(';', off);
2680 if offEnd < 0:
2681 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2682
2683 # Check this and the following statement whether it might have
2684 # something to do with decoding. This is a statement filter
2685 # criteria when generating the threaded functions blocks.
2686 offNextEnd = sRawCode.find(';', offEnd + 1);
2687 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2688 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2689 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2690 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2691 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2692 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2693 );
2694
2695 if not oMatch:
2696 if ch != '#':
2697 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2698 else:
2699 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2700 off = offEnd + 1;
2701 elif oMatch.group(1).startswith('if'):
2702 #
2703 # if () xxx [else yyy] statement.
2704 #
2705 oStmt = McCppCond(sCondExpr, fDecode);
2706 aoStmts.append(oStmt);
2707 off = offEnd + 1;
2708
2709 # Following the if () we can either have a {} containing zero or more statements
2710 # or we have a single statement.
2711 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2712 if sRawCode[offBlock1] == '{':
2713 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2714 if offBlock1End < 0:
2715 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2716 offBlock1 += 1;
2717 else:
2718 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2719 if offBlock1End < 0:
2720 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2721
2722 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2723
2724 # The else is optional and can likewise be followed by {} or a single statement.
2725 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2726 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2727 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2728 if sRawCode[offBlock2] == '{':
2729 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2730 if offBlock2End < 0:
2731 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2732 offBlock2 += 1;
2733 else:
2734 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2735 if offBlock2End < 0:
2736 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2737
2738 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2739 off = offBlock2End + 1;
2740
2741 elif oMatch.group(1) == 'else':
2742 # Problematic 'else' branch, typically involving #ifdefs.
2743 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2744
2745 return aoStmts;
2746
2747 def decode(self):
2748 """
2749 Decodes the block, populating self.aoStmts if necessary.
2750 Returns the statement list.
2751 Raises ParserException on failure.
2752 """
2753 if not self.aoStmts:
2754 self.aoStmts = self.decodeCode(''.join(self.asLines));
2755 return self.aoStmts;
2756
2757
2758 def checkForTooEarlyEffSegUse(self, aoStmts):
2759 """
2760 Checks if iEffSeg is used before the effective address has been decoded.
2761 Returns None on success, error string on failure.
2762
2763 See r158454 for an example of this issue.
2764 """
2765
2766 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2767 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2768 # as we're ASSUMING these will not occur before address calculation.
2769 for iStmt, oStmt in enumerate(aoStmts):
2770 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2771 while iStmt > 0:
2772 iStmt -= 1;
2773 oStmt = aoStmts[iStmt];
2774 for sArg in oStmt.asParams:
2775 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2776 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2777 break;
2778 return None;
2779
2780 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2781 kdDecodeCppStmtOkayAfterDone = {
2782 'IEMOP_HLP_IN_VMX_OPERATION': True,
2783 'IEMOP_HLP_VMX_INSTR': True,
2784 };
2785
2786 def checkForDoneDecoding(self, aoStmts):
2787 """
2788 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2789 invocation.
2790 Returns None on success, error string on failure.
2791
2792 This ensures safe instruction restarting in case the recompiler runs
2793 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2794 entries).
2795 """
2796
2797 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2798 # don't need to look.
2799 cIemOpHlpDone = 0;
2800 for iStmt, oStmt in enumerate(aoStmts):
2801 if oStmt.isCppStmt():
2802 #print('dbg: #%u[%u]: %s %s (%s)'
2803 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2804
2805 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2806 if oMatch:
2807 sFirstWord = oMatch.group(1);
2808 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2809 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2810 cIemOpHlpDone += 1;
2811 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2812 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2813 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2814 else:
2815 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2816 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2817 cIemOpHlpDone += 1;
2818 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2819 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2820 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2821 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2822 if cIemOpHlpDone == 1:
2823 return None;
2824 if cIemOpHlpDone > 1:
2825 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2826 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2827
2828 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2829 """
2830 Checks that the register references are placed after register fetches
2831 from the same register class.
2832 Returns None on success, error string on failure.
2833
2834 Example:
2835 SHL CH, CL
2836
2837 If the CH reference is created first, the fetching of CL will cause the
2838 RCX guest register to have an active shadow register when it's being
2839 updated. The shadow register will then be stale after the SHL operation
2840 completes, without us noticing.
2841
2842 It's easier to ensure we've got correct code than complicating the
2843 recompiler code with safeguards here.
2844 """
2845 for iStmt, oStmt in enumerate(aoStmts):
2846 if not oStmt.isCppStmt():
2847 offRef = oStmt.sName.find("_REF_");
2848 if offRef > 0:
2849 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2850 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2851 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2852 sClass = 'FPUREG';
2853 else:
2854 offUnderscore = oStmt.sName.find('_', offRef + 5);
2855 if offUnderscore > 0:
2856 assert offUnderscore > offRef;
2857 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2858 else:
2859 sClass = oStmt.sName[offRef + 5];
2860 asRegRefClasses[sClass] = True;
2861 else:
2862 offFetch = oStmt.sName.find("_FETCH_");
2863 if offFetch > 0:
2864 sClass = oStmt.sName[offFetch + 7 : ];
2865 if not sClass.startswith("MEM"):
2866 offUnderscore = sClass.find('_');
2867 if offUnderscore >= 0:
2868 assert offUnderscore > 0;
2869 sClass = sClass[:offUnderscore];
2870 if sClass in asRegRefClasses:
2871 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2872 % (iStmt + 1, oStmt.sName,);
2873
2874 # Go into branches.
2875 if isinstance(oStmt, McStmtCond):
2876 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2877 if sRet:
2878 return sRet;
2879 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2880 if sRet:
2881 return sRet;
2882 return None;
2883
2884 def check(self):
2885 """
2886 Performs some sanity checks on the block.
2887 Returns error string list, empty if all is fine.
2888 """
2889 aoStmts = self.decode();
2890 asRet = [];
2891
2892 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2893 if sRet:
2894 asRet.append(sRet);
2895
2896 sRet = self.checkForDoneDecoding(aoStmts);
2897 if sRet:
2898 asRet.append(sRet);
2899
2900 sRet = self.checkForFetchAfterRef(aoStmts, {});
2901 if sRet:
2902 asRet.append(sRet);
2903
2904 return asRet;
2905
2906
2907## Temporary flag for enabling / disabling experimental MCs depending on the
2908## SIMD register allocator.
2909g_fNativeSimd = True;
2910
2911## IEM_MC_XXX -> parser + info dictionary.
2912#
2913# The info columns:
2914# - col 1+0: boolean entry indicating whether the statement modifies state and
2915# must not be used before IEMOP_HL_DONE_*.
2916# - col 1+1: boolean entry indicating similar to the previous column but is
2917# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2918# The difference is that most IEM_MC_IF_XXX entries are False here.
2919# - col 1+2: boolean entry indicating native recompiler support.
2920#
2921# The raw table was generated via the following command
2922# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2923# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2924# pylint: disable=line-too-long
2925g_dMcStmtParsers = {
2926 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2927 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2928 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2929 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2930 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2931 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2932 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2933 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2934 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2935 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2936 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2937 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2938 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2939 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2940 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2941 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2942 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2943 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2944 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2945 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2946 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2947 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2948 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2949 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2950 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2951 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2953 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2954 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2955 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2956 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2957 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2958 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2959 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2960 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2961 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2962 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2967 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2968 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2969 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2970 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2971 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2972 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2973 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2974 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2975 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2976 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2977 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2978 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2979 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2980 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2981 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2982 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2983 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2984 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2985 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2986 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2987 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2988 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2989 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2990 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2991 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2992 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2993 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2994 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2995 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2996 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2997 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2998 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2999 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, False, ),
3000 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3001 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3002 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3003 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3004 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3005 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3006 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3007 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3008 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3009 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3010 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3011 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3012 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3013 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3014 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3015 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3016 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3017 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3020 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3021 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3022 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3023 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3024 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3025 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3026 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3027 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3028 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3029 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3030 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3031 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3032 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3033 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3034 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3035 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3036 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3037 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3038 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3045 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3046 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3047 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3048 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3049 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3050 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3051 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3052 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3053 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3054 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3055 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3056 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3057 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3058 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3059 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3060 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3061 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3062 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3063 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3064 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3065 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3066 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3067 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3068 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3069 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3070 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3071 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3072 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3073 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3076 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3079 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3080 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3081 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3082 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3083 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3084 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3085 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3086 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3087 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3088 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3089 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3090 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3091 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3092 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3093 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, False, ),
3094 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3095 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3096 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3097 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3098 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3099 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3100 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
3101 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3102 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3103 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3107 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3115 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3116 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3117 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3118 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3119 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3120 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3121 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3122 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3123 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3124 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3125 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3126 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3127 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3128 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3129 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3130 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3131 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3132 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3133 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3134 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3135 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3136 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3137 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3138 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3139 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3140 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, False, ),
3141 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, False, ),
3142 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3143 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3144 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3145 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3146 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3147 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3148 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3149 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3150 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3151 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3152 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3153 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3154 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3155 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3156 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3157 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3158 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3159 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3160 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3161 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3162 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3163 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3164 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3165 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3166 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3167 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3168 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3169 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3170 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3172 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3173 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3176 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3177 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3178 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3179 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3201 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3203 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3204 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3205 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3214 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3215 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3216 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3217 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3218 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3219 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3220 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3221 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3222 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3223 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3224 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3225 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3226 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3227 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3228 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3229 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3230 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3231 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3232 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3233 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3234 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3235 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3236 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3237 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, False, ),
3238 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3239 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3240 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3241 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3242 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3243 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3244 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3245 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3247 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3249 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3250 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3251 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3252 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3253 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3254 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3255 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3256 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3257 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3258 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3259 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3260 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3261 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3262 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3263 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3264 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3265 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3266 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3267 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3268 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3269 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3270 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3271 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3272 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3273 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3274 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3275 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3276 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3277 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3278 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3279 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3280 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3281 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3282 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3283 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3284 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3285 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3286 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3287 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3288 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3289 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3290 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3291 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3292 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3293 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3294 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3295 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3296 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3297 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3298 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3299 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3300 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3301 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3302 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3311 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3312 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3313 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3314 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3315 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3316 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3317 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3318 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3319 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3320 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3321 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3322 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3323 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3324 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3325 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3334 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3336 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3337 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3338 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3339 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3346 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3347 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3348 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3349 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3350 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3351 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3352 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3353 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3358 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3359};
3360# pylint: enable=line-too-long
3361
3362## List of microcode blocks.
3363g_aoMcBlocks = [] # type: List[McBlock]
3364
3365
3366
3367class ParserException(Exception):
3368 """ Parser exception """
3369 def __init__(self, sMessage):
3370 Exception.__init__(self, sMessage);
3371
3372
3373class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3374 """
3375 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3376 """
3377
3378 ## @name Parser state.
3379 ## @{
3380 kiCode = 0;
3381 kiCommentMulti = 1;
3382 ## @}
3383
3384 class Macro(object):
3385 """ Macro """
3386 def __init__(self, sName, asArgs, sBody, iLine):
3387 self.sName = sName; ##< The macro name.
3388 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3389 self.sBody = sBody;
3390 self.iLine = iLine;
3391 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3392
3393 @staticmethod
3394 def _needSpace(ch):
3395 """ This is just to make the expanded output a bit prettier. """
3396 return ch.isspace() and ch != '(';
3397
3398 def expandMacro(self, oParent, asArgs = None):
3399 """ Expands the macro body with the given arguments. """
3400 _ = oParent;
3401 sBody = self.sBody;
3402
3403 if self.oReArgMatch:
3404 assert len(asArgs) == len(self.asArgs);
3405 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3406
3407 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3408 oMatch = self.oReArgMatch.search(sBody);
3409 while oMatch:
3410 sName = oMatch.group(2);
3411 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3412 sValue = dArgs[sName];
3413 sPre = '';
3414 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3415 sPre = ' ';
3416 sPost = '';
3417 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3418 sPost = ' ';
3419 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3420 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3421 else:
3422 assert not asArgs;
3423
3424 return sBody;
3425
3426 class PreprocessorConditional(object):
3427 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3428
3429 ## Known defines.
3430 # - A value of 1 indicates that it's always defined.
3431 # - A value of 0 if it's always undefined
3432 # - A value of -1 if it's an arch and it depends of script parameters.
3433 # - A value of -2 if it's not recognized when filtering MC blocks.
3434 kdKnownDefines = {
3435 'IEM_WITH_ONE_BYTE_TABLE': 1,
3436 'IEM_WITH_TWO_BYTE_TABLE': 1,
3437 'IEM_WITH_THREE_0F_38': 1,
3438 'IEM_WITH_THREE_0F_3A': 1,
3439 'IEM_WITH_THREE_BYTE_TABLES': 1,
3440 'IEM_WITH_3DNOW': 1,
3441 'IEM_WITH_3DNOW_TABLE': 1,
3442 'IEM_WITH_VEX': 1,
3443 'IEM_WITH_VEX_TABLES': 1,
3444 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3445 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3446 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3447 'LOG_ENABLED': 1,
3448 'RT_WITHOUT_PRAGMA_ONCE': 0,
3449 'TST_IEM_CHECK_MC': 0,
3450 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3451 'RT_ARCH_AMD64': -1,
3452 'RT_ARCH_ARM64': -1,
3453 'RT_ARCH_ARM32': -1,
3454 'RT_ARCH_X86': -1,
3455 'RT_ARCH_SPARC': -1,
3456 'RT_ARCH_SPARC64': -1,
3457 };
3458 kdBuildArchToIprt = {
3459 'amd64': 'RT_ARCH_AMD64',
3460 'arm64': 'RT_ARCH_ARM64',
3461 'sparc32': 'RT_ARCH_SPARC64',
3462 };
3463 ## For parsing the next defined(xxxx).
3464 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3465
3466 def __init__(self, sType, sExpr):
3467 self.sType = sType;
3468 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3469 self.aoElif = [] # type: List[PreprocessorConditional]
3470 self.fInElse = [];
3471 if sType in ('if', 'elif'):
3472 self.checkExpression(sExpr);
3473 else:
3474 self.checkSupportedDefine(sExpr)
3475
3476 @staticmethod
3477 def checkSupportedDefine(sDefine):
3478 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3479 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3480 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3481 return True;
3482 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3483 return True;
3484 raise Exception('Unsupported define: %s' % (sDefine,));
3485
3486 @staticmethod
3487 def checkExpression(sExpr):
3488 """ Check that the expression is supported. Raises exception if not. """
3489 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3490 if sExpr in ('0', '1'):
3491 return True;
3492
3493 off = 0;
3494 cParan = 0;
3495 while off < len(sExpr):
3496 ch = sExpr[off];
3497
3498 # Unary operator or parentheses:
3499 if ch in ('(', '!'):
3500 if ch == '(':
3501 cParan += 1;
3502 off += 1;
3503 else:
3504 # defined(xxxx)
3505 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3506 if oMatch:
3507 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3508 elif sExpr[off:] != '1':
3509 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3510 off = oMatch.end();
3511
3512 # Look for closing parentheses.
3513 while off < len(sExpr) and sExpr[off].isspace():
3514 off += 1;
3515 if cParan > 0:
3516 while off < len(sExpr) and sExpr[off] == ')':
3517 if cParan <= 0:
3518 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3519 cParan -= 1;
3520 off += 1;
3521 while off < len(sExpr) and sExpr[off].isspace():
3522 off += 1;
3523
3524 # Look for binary operator.
3525 if off >= len(sExpr):
3526 break;
3527 if sExpr[off:off + 2] in ('||', '&&'):
3528 off += 2;
3529 else:
3530 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3531
3532 # Skip spaces.
3533 while off < len(sExpr) and sExpr[off].isspace():
3534 off += 1;
3535 if cParan != 0:
3536 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3537 return True;
3538
3539 @staticmethod
3540 def isArchIncludedInExpr(sExpr, sArch):
3541 """ Checks if sArch is included in the given expression. """
3542 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3543 if sExpr == '0':
3544 return False;
3545 if sExpr == '1':
3546 return True;
3547 off = 0;
3548 while off < len(sExpr):
3549 # defined(xxxx)
3550 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3551 if not oMatch:
3552 if sExpr[off:] == '1':
3553 return True;
3554 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3555 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3556 return True;
3557 off = oMatch.end();
3558
3559 # Look for OR operator.
3560 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3561 off += 1;
3562 if off >= len(sExpr):
3563 break;
3564 if sExpr.startswith('||'):
3565 off += 2;
3566 else:
3567 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3568
3569 return False;
3570
3571 @staticmethod
3572 def matchArch(sDefine, sArch):
3573 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3574 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3575
3576 @staticmethod
3577 def matchDefined(sExpr, sArch):
3578 """ Check the result of an ifdef/ifndef expression, given sArch. """
3579 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3580 if iDefine == -2:
3581 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3582 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3583
3584 def isArchIncludedInPrimaryBlock(self, sArch):
3585 """ Checks if sArch is included in the (primary) 'if' block. """
3586 if self.sType == 'ifdef':
3587 return self.matchDefined(self.sExpr, sArch);
3588 if self.sType == 'ifndef':
3589 return not self.matchDefined(self.sExpr, sArch);
3590 return self.isArchIncludedInExpr(self.sExpr, sArch);
3591
3592 @staticmethod
3593 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3594 """ Checks if sArch is included in the current conditional block. """
3595 _ = iLine;
3596 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3597 for oCond in aoCppCondStack:
3598 if oCond.isArchIncludedInPrimaryBlock(sArch):
3599 if oCond.aoElif or oCond.fInElse:
3600 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3601 return False;
3602 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3603 else:
3604 fFine = False;
3605 for oElifCond in oCond.aoElif:
3606 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3607 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3608 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3609 return False;
3610 fFine = True;
3611 if not fFine and not oCond.fInElse:
3612 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3613 return False;
3614 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3615 return True;
3616
3617 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3618 self.sSrcFile = sSrcFile;
3619 self.asLines = asLines;
3620 self.iLine = 0;
3621 self.iState = self.kiCode;
3622 self.sComment = '';
3623 self.iCommentLine = 0;
3624 self.aoCurInstrs = [] # type: List[Instruction]
3625 self.oCurFunction = None # type: DecoderFunction
3626 self.iMcBlockInFunc = 0;
3627 self.oCurMcBlock = None # type: McBlock
3628 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3629 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3630 if oInheritMacrosFrom:
3631 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3632 self.oReMacros = oInheritMacrosFrom.oReMacros;
3633 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3634 self.sHostArch = sHostArch;
3635
3636 assert sDefaultMap in g_dInstructionMaps;
3637 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3638
3639 self.cTotalInstr = 0;
3640 self.cTotalStubs = 0;
3641 self.cTotalTagged = 0;
3642 self.cTotalMcBlocks = 0;
3643
3644 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3645 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3646 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3647 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3648 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3649 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3650 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3651 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3652 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3653 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3654 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3655 self.fDebug = True;
3656 self.fDebugMc = False;
3657 self.fDebugPreproc = False;
3658
3659 self.dTagHandlers = {
3660 '@opbrief': self.parseTagOpBrief,
3661 '@opdesc': self.parseTagOpDesc,
3662 '@opmnemonic': self.parseTagOpMnemonic,
3663 '@op1': self.parseTagOpOperandN,
3664 '@op2': self.parseTagOpOperandN,
3665 '@op3': self.parseTagOpOperandN,
3666 '@op4': self.parseTagOpOperandN,
3667 '@oppfx': self.parseTagOpPfx,
3668 '@opmaps': self.parseTagOpMaps,
3669 '@opcode': self.parseTagOpcode,
3670 '@opcodesub': self.parseTagOpcodeSub,
3671 '@openc': self.parseTagOpEnc,
3672 #@opfltest: Lists all flags that will be used as input in some way.
3673 '@opfltest': self.parseTagOpEFlags,
3674 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3675 '@opflmodify': self.parseTagOpEFlags,
3676 #@opflclear: Lists all flags that will be set (set to 1).
3677 '@opflset': self.parseTagOpEFlags,
3678 #@opflclear: Lists all flags that will be cleared (set to 0).
3679 '@opflclear': self.parseTagOpEFlags,
3680 #@opflundef: List of flag documented as undefined.
3681 '@opflundef': self.parseTagOpEFlags,
3682 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3683 '@opflclass': self.parseTagOpEFlagsClass,
3684 '@ophints': self.parseTagOpHints,
3685 '@opdisenum': self.parseTagOpDisEnum,
3686 '@opmincpu': self.parseTagOpMinCpu,
3687 '@opcpuid': self.parseTagOpCpuId,
3688 '@opgroup': self.parseTagOpGroup,
3689 '@opunused': self.parseTagOpUnusedInvalid,
3690 '@opinvalid': self.parseTagOpUnusedInvalid,
3691 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3692 '@optest': self.parseTagOpTest,
3693 '@optestign': self.parseTagOpTestIgnore,
3694 '@optestignore': self.parseTagOpTestIgnore,
3695 '@opcopytests': self.parseTagOpCopyTests,
3696 '@oponly': self.parseTagOpOnlyTest,
3697 '@oponlytest': self.parseTagOpOnlyTest,
3698 '@opxcpttype': self.parseTagOpXcptType,
3699 '@opstats': self.parseTagOpStats,
3700 '@opfunction': self.parseTagOpFunction,
3701 '@opdone': self.parseTagOpDone,
3702 };
3703 for i in range(48):
3704 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3705 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3706
3707 self.asErrors = [];
3708
3709 def raiseError(self, sMessage):
3710 """
3711 Raise error prefixed with the source and line number.
3712 """
3713 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3714
3715 def raiseCommentError(self, iLineInComment, sMessage):
3716 """
3717 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3718 """
3719 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3720
3721 def error(self, sMessage):
3722 """
3723 Adds an error.
3724 returns False;
3725 """
3726 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3727 return False;
3728
3729 def errorOnLine(self, iLine, sMessage):
3730 """
3731 Adds an error.
3732 returns False;
3733 """
3734 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3735 return False;
3736
3737 def errorComment(self, iLineInComment, sMessage):
3738 """
3739 Adds a comment error.
3740 returns False;
3741 """
3742 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3743 return False;
3744
3745 def printErrors(self):
3746 """
3747 Print the errors to stderr.
3748 Returns number of errors.
3749 """
3750 if self.asErrors:
3751 sys.stderr.write(u''.join(self.asErrors));
3752 return len(self.asErrors);
3753
3754 def debug(self, sMessage):
3755 """
3756 For debugging.
3757 """
3758 if self.fDebug:
3759 print('debug: %s' % (sMessage,), file = sys.stderr);
3760
3761 def stripComments(self, sLine):
3762 """
3763 Returns sLine with comments stripped.
3764
3765 Complains if traces of incomplete multi-line comments are encountered.
3766 """
3767 sLine = self.oReComment.sub(" ", sLine);
3768 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3769 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3770 return sLine;
3771
3772 def parseFunctionTable(self, sLine):
3773 """
3774 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3775
3776 Note! Updates iLine as it consumes the whole table.
3777 """
3778
3779 #
3780 # Extract the table name.
3781 #
3782 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3783 oMap = g_dInstructionMapsByIemName.get(sName);
3784 if not oMap:
3785 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3786 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3787
3788 #
3789 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3790 # entries per byte:
3791 # no prefix, 066h prefix, f3h prefix, f2h prefix
3792 # Those tables has 256 & 32 entries respectively.
3793 #
3794 cEntriesPerByte = 4;
3795 cValidTableLength = 1024;
3796 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3797
3798 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3799 if oEntriesMatch:
3800 cEntriesPerByte = 1;
3801 cValidTableLength = int(oEntriesMatch.group(1));
3802 asPrefixes = (None,);
3803
3804 #
3805 # The next line should be '{' and nothing else.
3806 #
3807 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3808 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3809 self.iLine += 1;
3810
3811 #
3812 # Parse till we find the end of the table.
3813 #
3814 iEntry = 0;
3815 while self.iLine < len(self.asLines):
3816 # Get the next line and strip comments and spaces (assumes no
3817 # multi-line comments).
3818 sLine = self.asLines[self.iLine];
3819 self.iLine += 1;
3820 sLine = self.stripComments(sLine).strip();
3821
3822 # Split the line up into entries, expanding IEMOP_X4 usage.
3823 asEntries = sLine.split(',');
3824 for i in range(len(asEntries) - 1, -1, -1):
3825 sEntry = asEntries[i].strip();
3826 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3827 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3828 asEntries.insert(i + 1, sEntry);
3829 asEntries.insert(i + 1, sEntry);
3830 asEntries.insert(i + 1, sEntry);
3831 if sEntry:
3832 asEntries[i] = sEntry;
3833 else:
3834 del asEntries[i];
3835
3836 # Process the entries.
3837 for sEntry in asEntries:
3838 if sEntry in ('};', '}'):
3839 if iEntry != cValidTableLength:
3840 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3841 return True;
3842 if sEntry.startswith('iemOp_Invalid'):
3843 pass; # skip
3844 else:
3845 # Look up matching instruction by function.
3846 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3847 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3848 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3849 if aoInstr:
3850 if not isinstance(aoInstr, list):
3851 aoInstr = [aoInstr,];
3852 oInstr = None;
3853 for oCurInstr in aoInstr:
3854 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3855 pass;
3856 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3857 oCurInstr.sPrefix = sPrefix;
3858 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3859 oCurInstr.sOpcode = sOpcode;
3860 oCurInstr.sPrefix = sPrefix;
3861 else:
3862 continue;
3863 oInstr = oCurInstr;
3864 break;
3865 if not oInstr:
3866 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3867 aoInstr.append(oInstr);
3868 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3869 g_aoAllInstructions.append(oInstr);
3870 oMap.aoInstructions.append(oInstr);
3871 else:
3872 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3873 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3874 iEntry += 1;
3875
3876 return self.error('Unexpected end of file in PFNIEMOP table');
3877
3878 def addInstruction(self, iLine = None):
3879 """
3880 Adds an instruction.
3881 """
3882 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3883 g_aoAllInstructions.append(oInstr);
3884 self.aoCurInstrs.append(oInstr);
3885 return oInstr;
3886
3887 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3888 """
3889 Derives the mnemonic and operands from a IEM stats base name like string.
3890 """
3891 if oInstr.sMnemonic is None:
3892 asWords = sStats.split('_');
3893 oInstr.sMnemonic = asWords[0].lower();
3894 if len(asWords) > 1 and not oInstr.aoOperands:
3895 for sType in asWords[1:]:
3896 if sType in g_kdOpTypes:
3897 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3898 else:
3899 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3900 return False;
3901 return True;
3902
3903 def doneInstructionOne(self, oInstr, iLine):
3904 """
3905 Complete the parsing by processing, validating and expanding raw inputs.
3906 """
3907 assert oInstr.iLineCompleted is None;
3908 oInstr.iLineCompleted = iLine;
3909
3910 #
3911 # Specified instructions.
3912 #
3913 if oInstr.cOpTags > 0:
3914 if oInstr.sStats is None:
3915 pass;
3916
3917 #
3918 # Unspecified legacy stuff. We generally only got a few things to go on here.
3919 # /** Opcode 0x0f 0x00 /0. */
3920 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3921 #
3922 else:
3923 #if oInstr.sRawOldOpcodes:
3924 #
3925 #if oInstr.sMnemonic:
3926 pass;
3927
3928 #
3929 # Common defaults.
3930 #
3931
3932 # Guess mnemonic and operands from stats if the former is missing.
3933 if oInstr.sMnemonic is None:
3934 if oInstr.sStats is not None:
3935 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3936 elif oInstr.sFunction is not None:
3937 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3938
3939 # Derive the disassembler op enum constant from the mnemonic.
3940 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3941 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3942
3943 # Derive the IEM statistics base name from mnemonic and operand types.
3944 if oInstr.sStats is None:
3945 if oInstr.sFunction is not None:
3946 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3947 elif oInstr.sMnemonic is not None:
3948 oInstr.sStats = oInstr.sMnemonic;
3949 for oOperand in oInstr.aoOperands:
3950 if oOperand.sType:
3951 oInstr.sStats += '_' + oOperand.sType;
3952
3953 # Derive the IEM function name from mnemonic and operand types.
3954 if oInstr.sFunction is None:
3955 if oInstr.sMnemonic is not None:
3956 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3957 for oOperand in oInstr.aoOperands:
3958 if oOperand.sType:
3959 oInstr.sFunction += '_' + oOperand.sType;
3960 elif oInstr.sStats:
3961 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3962
3963 #
3964 # Apply default map and then add the instruction to all it's groups.
3965 #
3966 if not oInstr.aoMaps:
3967 oInstr.aoMaps = [ self.oDefaultMap, ];
3968 for oMap in oInstr.aoMaps:
3969 oMap.aoInstructions.append(oInstr);
3970
3971 #
3972 # Derive encoding from operands and maps.
3973 #
3974 if oInstr.sEncoding is None:
3975 if not oInstr.aoOperands:
3976 if oInstr.fUnused and oInstr.sSubOpcode:
3977 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3978 else:
3979 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3980 elif oInstr.aoOperands[0].usesModRM():
3981 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3982 or oInstr.onlyInVexMaps():
3983 oInstr.sEncoding = 'VEX.ModR/M';
3984 else:
3985 oInstr.sEncoding = 'ModR/M';
3986
3987 #
3988 # Check the opstat value and add it to the opstat indexed dictionary.
3989 #
3990 if oInstr.sStats:
3991 if oInstr.sStats not in g_dAllInstructionsByStat:
3992 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3993 else:
3994 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3995 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3996
3997 #
3998 # Add to function indexed dictionary. We allow multiple instructions per function.
3999 #
4000 if oInstr.sFunction:
4001 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4002 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4003 else:
4004 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4005
4006 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4007 return True;
4008
4009 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4010 """
4011 Done with current instruction.
4012 """
4013 for oInstr in self.aoCurInstrs:
4014 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4015 if oInstr.fStub:
4016 self.cTotalStubs += 1;
4017
4018 self.cTotalInstr += len(self.aoCurInstrs);
4019
4020 self.sComment = '';
4021 self.aoCurInstrs = [];
4022 if fEndOfFunction:
4023 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4024 if self.oCurFunction:
4025 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4026 self.oCurFunction = None;
4027 self.iMcBlockInFunc = 0;
4028 return True;
4029
4030 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4031 """
4032 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4033 is False, only None values and empty strings are replaced.
4034 """
4035 for oInstr in self.aoCurInstrs:
4036 if fOverwrite is not True:
4037 oOldValue = getattr(oInstr, sAttrib);
4038 if oOldValue is not None:
4039 continue;
4040 setattr(oInstr, sAttrib, oValue);
4041
4042 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4043 """
4044 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4045 If fOverwrite is False, only None values and empty strings are replaced.
4046 """
4047 for oInstr in self.aoCurInstrs:
4048 aoArray = getattr(oInstr, sAttrib);
4049 while len(aoArray) <= iEntry:
4050 aoArray.append(None);
4051 if fOverwrite is True or aoArray[iEntry] is None:
4052 aoArray[iEntry] = oValue;
4053
4054 def parseCommentOldOpcode(self, asLines):
4055 """ Deals with 'Opcode 0xff /4' like comments """
4056 asWords = asLines[0].split();
4057 if len(asWords) >= 2 \
4058 and asWords[0] == 'Opcode' \
4059 and ( asWords[1].startswith('0x')
4060 or asWords[1].startswith('0X')):
4061 asWords = asWords[:1];
4062 for iWord, sWord in enumerate(asWords):
4063 if sWord.startswith('0X'):
4064 sWord = '0x' + sWord[:2];
4065 asWords[iWord] = asWords;
4066 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4067
4068 return False;
4069
4070 def ensureInstructionForOpTag(self, iTagLine):
4071 """ Ensure there is an instruction for the op-tag being parsed. """
4072 if not self.aoCurInstrs:
4073 self.addInstruction(self.iCommentLine + iTagLine);
4074 for oInstr in self.aoCurInstrs:
4075 oInstr.cOpTags += 1;
4076 if oInstr.cOpTags == 1:
4077 self.cTotalTagged += 1;
4078 return self.aoCurInstrs[-1];
4079
4080 @staticmethod
4081 def flattenSections(aasSections):
4082 """
4083 Flattens multiline sections into stripped single strings.
4084 Returns list of strings, on section per string.
4085 """
4086 asRet = [];
4087 for asLines in aasSections:
4088 if asLines:
4089 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4090 return asRet;
4091
4092 @staticmethod
4093 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4094 """
4095 Flattens sections into a simple stripped string with newlines as
4096 section breaks. The final section does not sport a trailing newline.
4097 """
4098 # Typical: One section with a single line.
4099 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4100 return aasSections[0][0].strip();
4101
4102 sRet = '';
4103 for iSection, asLines in enumerate(aasSections):
4104 if asLines:
4105 if iSection > 0:
4106 sRet += sSectionSep;
4107 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4108 return sRet;
4109
4110
4111
4112 ## @name Tag parsers
4113 ## @{
4114
4115 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4116 """
4117 Tag: @opbrief
4118 Value: Text description, multiple sections, appended.
4119
4120 Brief description. If not given, it's the first sentence from @opdesc.
4121 """
4122 oInstr = self.ensureInstructionForOpTag(iTagLine);
4123
4124 # Flatten and validate the value.
4125 sBrief = self.flattenAllSections(aasSections);
4126 if not sBrief:
4127 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4128 if sBrief[-1] != '.':
4129 sBrief = sBrief + '.';
4130 if len(sBrief) > 180:
4131 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4132 offDot = sBrief.find('.');
4133 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4134 offDot = sBrief.find('.', offDot + 1);
4135 if offDot >= 0 and offDot != len(sBrief) - 1:
4136 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4137
4138 # Update the instruction.
4139 if oInstr.sBrief is not None:
4140 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4141 % (sTag, oInstr.sBrief, sBrief,));
4142 _ = iEndLine;
4143 return True;
4144
4145 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4146 """
4147 Tag: @opdesc
4148 Value: Text description, multiple sections, appended.
4149
4150 It is used to describe instructions.
4151 """
4152 oInstr = self.ensureInstructionForOpTag(iTagLine);
4153 if aasSections:
4154 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4155 return True;
4156
4157 _ = sTag; _ = iEndLine;
4158 return True;
4159
4160 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4161 """
4162 Tag: @opmenmonic
4163 Value: mnemonic
4164
4165 The 'mnemonic' value must be a valid C identifier string. Because of
4166 prefixes, groups and whatnot, there times when the mnemonic isn't that
4167 of an actual assembler mnemonic.
4168 """
4169 oInstr = self.ensureInstructionForOpTag(iTagLine);
4170
4171 # Flatten and validate the value.
4172 sMnemonic = self.flattenAllSections(aasSections);
4173 if not self.oReMnemonic.match(sMnemonic):
4174 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4175 if oInstr.sMnemonic is not None:
4176 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4177 % (sTag, oInstr.sMnemonic, sMnemonic,));
4178 oInstr.sMnemonic = sMnemonic
4179
4180 _ = iEndLine;
4181 return True;
4182
4183 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4184 """
4185 Tags: @op1, @op2, @op3, @op4
4186 Value: [where:]type
4187
4188 The 'where' value indicates where the operand is found, like the 'reg'
4189 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4190 a list.
4191
4192 The 'type' value indicates the operand type. These follow the types
4193 given in the opcode tables in the CPU reference manuals.
4194 See Instruction.kdOperandTypes for a list.
4195
4196 """
4197 oInstr = self.ensureInstructionForOpTag(iTagLine);
4198 idxOp = int(sTag[-1]) - 1;
4199 assert 0 <= idxOp < 4;
4200
4201 # flatten, split up, and validate the "where:type" value.
4202 sFlattened = self.flattenAllSections(aasSections);
4203 asSplit = sFlattened.split(':');
4204 if len(asSplit) == 1:
4205 sType = asSplit[0];
4206 sWhere = None;
4207 elif len(asSplit) == 2:
4208 (sWhere, sType) = asSplit;
4209 else:
4210 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4211
4212 if sType not in g_kdOpTypes:
4213 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4214 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4215 if sWhere is None:
4216 sWhere = g_kdOpTypes[sType][1];
4217 elif sWhere not in g_kdOpLocations:
4218 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4219 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4220
4221 # Insert the operand, refusing to overwrite an existing one.
4222 while idxOp >= len(oInstr.aoOperands):
4223 oInstr.aoOperands.append(None);
4224 if oInstr.aoOperands[idxOp] is not None:
4225 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4226 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4227 sWhere, sType,));
4228 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4229
4230 _ = iEndLine;
4231 return True;
4232
4233 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4234 """
4235 Tag: @opmaps
4236 Value: map[,map2]
4237
4238 Indicates which maps the instruction is in. There is a default map
4239 associated with each input file.
4240 """
4241 oInstr = self.ensureInstructionForOpTag(iTagLine);
4242
4243 # Flatten, split up and validate the value.
4244 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4245 asMaps = sFlattened.split(',');
4246 if not asMaps:
4247 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4248 for sMap in asMaps:
4249 if sMap not in g_dInstructionMaps:
4250 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4251 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4252
4253 # Add the maps to the current list. Throw errors on duplicates.
4254 for oMap in oInstr.aoMaps:
4255 if oMap.sName in asMaps:
4256 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4257
4258 for sMap in asMaps:
4259 oMap = g_dInstructionMaps[sMap];
4260 if oMap not in oInstr.aoMaps:
4261 oInstr.aoMaps.append(oMap);
4262 else:
4263 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4264
4265 _ = iEndLine;
4266 return True;
4267
4268 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4269 """
4270 Tag: @oppfx
4271 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4272
4273 Required prefix for the instruction. (In a (E)VEX context this is the
4274 value of the 'pp' field rather than an actual prefix.)
4275 """
4276 oInstr = self.ensureInstructionForOpTag(iTagLine);
4277
4278 # Flatten and validate the value.
4279 sFlattened = self.flattenAllSections(aasSections);
4280 asPrefixes = sFlattened.split();
4281 if len(asPrefixes) > 1:
4282 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4283
4284 sPrefix = asPrefixes[0].lower();
4285 if sPrefix == 'none':
4286 sPrefix = 'none';
4287 elif sPrefix == 'n/a':
4288 sPrefix = None;
4289 else:
4290 if len(sPrefix) == 2:
4291 sPrefix = '0x' + sPrefix;
4292 if not _isValidOpcodeByte(sPrefix):
4293 if sPrefix != '!0xf3':
4294 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4295
4296 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4297 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4298
4299 # Set it.
4300 if oInstr.sPrefix is not None:
4301 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4302 oInstr.sPrefix = sPrefix;
4303
4304 _ = iEndLine;
4305 return True;
4306
4307 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4308 """
4309 Tag: @opcode
4310 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4311
4312 The opcode byte or sub-byte for the instruction in the context of a map.
4313 """
4314 oInstr = self.ensureInstructionForOpTag(iTagLine);
4315
4316 # Flatten and validate the value.
4317 sOpcode = self.flattenAllSections(aasSections);
4318 if _isValidOpcodeByte(sOpcode):
4319 pass;
4320 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4321 pass;
4322 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4323 pass;
4324 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4325 pass;
4326 else:
4327 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4328
4329 # Set it.
4330 if oInstr.sOpcode is not None:
4331 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4332 oInstr.sOpcode = sOpcode;
4333
4334 _ = iEndLine;
4335 return True;
4336
4337 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4338 """
4339 Tag: @opcodesub
4340 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4341 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4342 | !11 rex.w=0 | !11 mr/reg rex.w=0
4343 | !11 rex.w=1 | !11 mr/reg rex.w=1
4344
4345 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4346 represents exactly two different instructions. The more proper way would
4347 be to go via maps with two members, but this is faster.
4348 """
4349 oInstr = self.ensureInstructionForOpTag(iTagLine);
4350
4351 # Flatten and validate the value.
4352 sSubOpcode = self.flattenAllSections(aasSections);
4353 if sSubOpcode not in g_kdSubOpcodes:
4354 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4355 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4356 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4357
4358 # Set it.
4359 if oInstr.sSubOpcode is not None:
4360 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4361 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4362 oInstr.sSubOpcode = sSubOpcode;
4363
4364 _ = iEndLine;
4365 return True;
4366
4367 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4368 """
4369 Tag: @openc
4370 Value: ModR/M|fixed|prefix|<map name>
4371
4372 The instruction operand encoding style.
4373 """
4374 oInstr = self.ensureInstructionForOpTag(iTagLine);
4375
4376 # Flatten and validate the value.
4377 sEncoding = self.flattenAllSections(aasSections);
4378 if sEncoding in g_kdEncodings:
4379 pass;
4380 elif sEncoding in g_dInstructionMaps:
4381 pass;
4382 elif not _isValidOpcodeByte(sEncoding):
4383 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4384
4385 # Set it.
4386 if oInstr.sEncoding is not None:
4387 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4388 % ( sTag, oInstr.sEncoding, sEncoding,));
4389 oInstr.sEncoding = sEncoding;
4390
4391 _ = iEndLine;
4392 return True;
4393
4394 ## EFlags tag to Instruction attribute name.
4395 kdOpFlagToAttr = {
4396 '@opfltest': 'asFlTest',
4397 '@opflmodify': 'asFlModify',
4398 '@opflundef': 'asFlUndefined',
4399 '@opflset': 'asFlSet',
4400 '@opflclear': 'asFlClear',
4401 };
4402
4403 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4404 """
4405 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4406 Value: <eflags specifier>
4407
4408 """
4409 oInstr = self.ensureInstructionForOpTag(iTagLine);
4410
4411 # Flatten, split up and validate the values.
4412 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4413 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4414 asFlags = [];
4415 else:
4416 fRc = True;
4417 for iFlag, sFlag in enumerate(asFlags):
4418 if sFlag not in g_kdEFlagsMnemonics:
4419 if sFlag.strip() in g_kdEFlagsMnemonics:
4420 asFlags[iFlag] = sFlag.strip();
4421 else:
4422 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4423 if not fRc:
4424 return False;
4425
4426 # Set them.
4427 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4428 if asOld is not None and len(asOld) > 0:
4429 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4430 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4431
4432 _ = iEndLine;
4433 return True;
4434
4435 ## EFLAGS class definitions with their attribute lists.
4436 kdEFlagsClasses = {
4437 'arithmetic': { # add, sub, ...
4438 'asFlTest': [],
4439 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4440 'asFlClear': [],
4441 'asFlSet': [],
4442 'asFlUndefined': [],
4443 },
4444 'arithmetic_carry': { # adc, sbb, ...
4445 'asFlTest': [ 'cf', ],
4446 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4447 'asFlClear': [],
4448 'asFlSet': [],
4449 'asFlUndefined': [],
4450 },
4451 'incdec': {
4452 'asFlTest': [],
4453 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4454 'asFlClear': [],
4455 'asFlSet': [],
4456 'asFlUndefined': [],
4457 },
4458 'division': { ## @todo specify intel/amd differences...
4459 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4460 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4461 'asFlClear': [],
4462 'asFlSet': [],
4463 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4464 },
4465 'multiply': { ## @todo specify intel/amd differences...
4466 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4467 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4468 'asFlClear': [], # between IMUL and MUL.
4469 'asFlSet': [],
4470 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4471 },
4472 'logical': { # and, or, xor, ...
4473 'asFlTest': [],
4474 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4475 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4476 'asFlSet': [],
4477 'asFlUndefined': [ 'af', ],
4478 },
4479 'rotate_1': { # rol and ror with fixed 1 shift count
4480 'asFlTest': [],
4481 'asFlModify': [ 'cf', 'of', ],
4482 'asFlClear': [],
4483 'asFlSet': [],
4484 'asFlUndefined': [],
4485 },
4486 'rotate_count': { # rol and ror w/o fixed 1 shift count
4487 'asFlTest': [],
4488 'asFlModify': [ 'cf', 'of', ],
4489 'asFlClear': [],
4490 'asFlSet': [],
4491 'asFlUndefined': [ 'of', ],
4492 },
4493 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4494 'asFlTest': [ 'cf', ],
4495 'asFlModify': [ 'cf', 'of', ],
4496 'asFlClear': [],
4497 'asFlSet': [],
4498 'asFlUndefined': [],
4499 },
4500 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4501 'asFlTest': [ 'cf', ],
4502 'asFlModify': [ 'cf', 'of', ],
4503 'asFlClear': [],
4504 'asFlSet': [],
4505 'asFlUndefined': [ 'of', ],
4506 },
4507 'shift_1': { # shl, shr or sar with fixed 1 count.
4508 'asFlTest': [],
4509 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4510 'asFlClear': [],
4511 'asFlSet': [],
4512 'asFlUndefined': [ 'af', ],
4513 },
4514 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4515 'asFlTest': [],
4516 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4517 'asFlClear': [],
4518 'asFlSet': [],
4519 'asFlUndefined': [ 'af', 'of', ],
4520 },
4521 'bitmap': { # bt, btc, btr, btc
4522 'asFlTest': [],
4523 'asFlModify': [ 'cf', ],
4524 'asFlClear': [],
4525 'asFlSet': [],
4526 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4527 },
4528 'unchanged': {
4529 'asFlTest': [],
4530 'asFlModify': [],
4531 'asFlClear': [],
4532 'asFlSet': [],
4533 'asFlUndefined': [],
4534 },
4535 };
4536 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4537 """
4538 Tags: @opflclass
4539 Value: arithmetic, logical, ...
4540
4541 """
4542 oInstr = self.ensureInstructionForOpTag(iTagLine);
4543
4544 # Flatten and validate the value.
4545 sClass = self.flattenAllSections(aasSections);
4546 kdAttribs = self.kdEFlagsClasses.get(sClass);
4547 if not kdAttribs:
4548 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4549 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4550
4551 # Set the attributes.
4552 for sAttrib, asFlags in kdAttribs.items():
4553 asOld = getattr(oInstr, sAttrib);
4554 if asOld is not None:
4555 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4556 % (sTag, asOld, asFlags, sAttrib));
4557 setattr(oInstr, sAttrib, asFlags);
4558
4559 _ = iEndLine;
4560 return True;
4561
4562 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4563 """
4564 Tag: @ophints
4565 Value: Comma or space separated list of flags and hints.
4566
4567 This covers the disassembler flags table and more.
4568 """
4569 oInstr = self.ensureInstructionForOpTag(iTagLine);
4570
4571 # Flatten as a space separated list, split it up and validate the values.
4572 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4573 if len(asHints) == 1 and asHints[0].lower() == 'none':
4574 asHints = [];
4575 else:
4576 fRc = True;
4577 for iHint, sHint in enumerate(asHints):
4578 if sHint not in g_kdHints:
4579 if sHint.strip() in g_kdHints:
4580 sHint[iHint] = sHint.strip();
4581 else:
4582 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4583 if not fRc:
4584 return False;
4585
4586 # Append them.
4587 for sHint in asHints:
4588 if sHint not in oInstr.dHints:
4589 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4590 else:
4591 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4592
4593 _ = iEndLine;
4594 return True;
4595
4596 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4597 """
4598 Tag: @opdisenum
4599 Value: OP_XXXX
4600
4601 This is for select a specific (legacy) disassembler enum value for the
4602 instruction.
4603 """
4604 oInstr = self.ensureInstructionForOpTag(iTagLine);
4605
4606 # Flatten and split.
4607 asWords = self.flattenAllSections(aasSections).split();
4608 if len(asWords) != 1:
4609 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4610 if not asWords:
4611 return False;
4612 sDisEnum = asWords[0];
4613 if not self.oReDisEnum.match(sDisEnum):
4614 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4615 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4616
4617 # Set it.
4618 if oInstr.sDisEnum is not None:
4619 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4620 oInstr.sDisEnum = sDisEnum;
4621
4622 _ = iEndLine;
4623 return True;
4624
4625 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4626 """
4627 Tag: @opmincpu
4628 Value: <simple CPU name>
4629
4630 Indicates when this instruction was introduced.
4631 """
4632 oInstr = self.ensureInstructionForOpTag(iTagLine);
4633
4634 # Flatten the value, split into words, make sure there's just one, valid it.
4635 asCpus = self.flattenAllSections(aasSections).split();
4636 if len(asCpus) > 1:
4637 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4638
4639 sMinCpu = asCpus[0];
4640 if sMinCpu in g_kdCpuNames:
4641 oInstr.sMinCpu = sMinCpu;
4642 else:
4643 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4644 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4645
4646 # Set it.
4647 if oInstr.sMinCpu is None:
4648 oInstr.sMinCpu = sMinCpu;
4649 elif oInstr.sMinCpu != sMinCpu:
4650 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4651
4652 _ = iEndLine;
4653 return True;
4654
4655 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4656 """
4657 Tag: @opcpuid
4658 Value: none | <CPUID flag specifier>
4659
4660 CPUID feature bit which is required for the instruction to be present.
4661 """
4662 oInstr = self.ensureInstructionForOpTag(iTagLine);
4663
4664 # Flatten as a space separated list, split it up and validate the values.
4665 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4666 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4667 asCpuIds = [];
4668 else:
4669 fRc = True;
4670 for iCpuId, sCpuId in enumerate(asCpuIds):
4671 if sCpuId not in g_kdCpuIdFlags:
4672 if sCpuId.strip() in g_kdCpuIdFlags:
4673 sCpuId[iCpuId] = sCpuId.strip();
4674 else:
4675 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4676 if not fRc:
4677 return False;
4678
4679 # Append them.
4680 for sCpuId in asCpuIds:
4681 if sCpuId not in oInstr.asCpuIds:
4682 oInstr.asCpuIds.append(sCpuId);
4683 else:
4684 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4685
4686 _ = iEndLine;
4687 return True;
4688
4689 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4690 """
4691 Tag: @opgroup
4692 Value: op_grp1[_subgrp2[_subsubgrp3]]
4693
4694 Instruction grouping.
4695 """
4696 oInstr = self.ensureInstructionForOpTag(iTagLine);
4697
4698 # Flatten as a space separated list, split it up and validate the values.
4699 asGroups = self.flattenAllSections(aasSections).split();
4700 if len(asGroups) != 1:
4701 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4702 sGroup = asGroups[0];
4703 if not self.oReGroupName.match(sGroup):
4704 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4705 % (sTag, sGroup, self.oReGroupName.pattern));
4706
4707 # Set it.
4708 if oInstr.sGroup is not None:
4709 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4710 oInstr.sGroup = sGroup;
4711
4712 _ = iEndLine;
4713 return True;
4714
4715 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4716 """
4717 Tag: @opunused, @opinvalid, @opinvlstyle
4718 Value: <invalid opcode behaviour style>
4719
4720 The @opunused indicates the specification is for a currently unused
4721 instruction encoding.
4722
4723 The @opinvalid indicates the specification is for an invalid currently
4724 instruction encoding (like UD2).
4725
4726 The @opinvlstyle just indicates how CPUs decode the instruction when
4727 not supported (@opcpuid, @opmincpu) or disabled.
4728 """
4729 oInstr = self.ensureInstructionForOpTag(iTagLine);
4730
4731 # Flatten as a space separated list, split it up and validate the values.
4732 asStyles = self.flattenAllSections(aasSections).split();
4733 if len(asStyles) != 1:
4734 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4735 sStyle = asStyles[0];
4736 if sStyle not in g_kdInvalidStyles:
4737 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4738 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4739 # Set it.
4740 if oInstr.sInvalidStyle is not None:
4741 return self.errorComment(iTagLine,
4742 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4743 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4744 oInstr.sInvalidStyle = sStyle;
4745 if sTag == '@opunused':
4746 oInstr.fUnused = True;
4747 elif sTag == '@opinvalid':
4748 oInstr.fInvalid = True;
4749
4750 _ = iEndLine;
4751 return True;
4752
4753 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4754 """
4755 Tag: @optest
4756 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4757 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4758
4759 The main idea here is to generate basic instruction tests.
4760
4761 The probably simplest way of handling the diverse input, would be to use
4762 it to produce size optimized byte code for a simple interpreter that
4763 modifies the register input and output states.
4764
4765 An alternative to the interpreter would be creating multiple tables,
4766 but that becomes rather complicated wrt what goes where and then to use
4767 them in an efficient manner.
4768 """
4769 oInstr = self.ensureInstructionForOpTag(iTagLine);
4770
4771 #
4772 # Do it section by section.
4773 #
4774 for asSectionLines in aasSections:
4775 #
4776 # Sort the input into outputs, inputs and selector conditions.
4777 #
4778 sFlatSection = self.flattenAllSections([asSectionLines,]);
4779 if not sFlatSection:
4780 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4781 continue;
4782 oTest = InstructionTest(oInstr);
4783
4784 asSelectors = [];
4785 asInputs = [];
4786 asOutputs = [];
4787 asCur = asOutputs;
4788 fRc = True;
4789 asWords = sFlatSection.split();
4790 for iWord in range(len(asWords) - 1, -1, -1):
4791 sWord = asWords[iWord];
4792 # Check for array switchers.
4793 if sWord == '->':
4794 if asCur != asOutputs:
4795 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4796 break;
4797 asCur = asInputs;
4798 elif sWord == '/':
4799 if asCur != asInputs:
4800 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4801 break;
4802 asCur = asSelectors;
4803 else:
4804 asCur.insert(0, sWord);
4805
4806 #
4807 # Validate and add selectors.
4808 #
4809 for sCond in asSelectors:
4810 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4811 oSelector = None;
4812 for sOp in TestSelector.kasCompareOps:
4813 off = sCondExp.find(sOp);
4814 if off >= 0:
4815 sVariable = sCondExp[:off];
4816 sValue = sCondExp[off + len(sOp):];
4817 if sVariable in TestSelector.kdVariables:
4818 if sValue in TestSelector.kdVariables[sVariable]:
4819 oSelector = TestSelector(sVariable, sOp, sValue);
4820 else:
4821 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4822 % ( sTag, sValue, sCond,
4823 TestSelector.kdVariables[sVariable].keys(),));
4824 else:
4825 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4826 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4827 break;
4828 if oSelector is not None:
4829 for oExisting in oTest.aoSelectors:
4830 if oExisting.sVariable == oSelector.sVariable:
4831 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4832 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4833 oTest.aoSelectors.append(oSelector);
4834 else:
4835 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4836
4837 #
4838 # Validate outputs and inputs, adding them to the test as we go along.
4839 #
4840 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4841 asValidFieldKinds = [ 'both', sDesc, ];
4842 for sItem in asItems:
4843 oItem = None;
4844 for sOp in TestInOut.kasOperators:
4845 off = sItem.find(sOp);
4846 if off < 0:
4847 continue;
4848 sField = sItem[:off];
4849 sValueType = sItem[off + len(sOp):];
4850 if sField in TestInOut.kdFields \
4851 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4852 asSplit = sValueType.split(':', 1);
4853 sValue = asSplit[0];
4854 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4855 if sType in TestInOut.kdTypes:
4856 oValid = TestInOut.kdTypes[sType].validate(sValue);
4857 if oValid is True:
4858 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4859 oItem = TestInOut(sField, sOp, sValue, sType);
4860 else:
4861 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4862 % ( sTag, sDesc, sItem, ));
4863 else:
4864 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4865 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4866 else:
4867 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4868 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4869 else:
4870 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4871 % ( sTag, sDesc, sField, sItem,
4872 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4873 if asVal[1] in asValidFieldKinds]),));
4874 break;
4875 if oItem is not None:
4876 for oExisting in aoDst:
4877 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4878 self.errorComment(iTagLine,
4879 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4880 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4881 aoDst.append(oItem);
4882 else:
4883 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4884
4885 #
4886 # .
4887 #
4888 if fRc:
4889 oInstr.aoTests.append(oTest);
4890 else:
4891 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4892 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4893 % (sTag, asSelectors, asInputs, asOutputs,));
4894
4895 _ = iEndLine;
4896 return True;
4897
4898 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4899 """
4900 Numbered @optest tag. Either @optest42 or @optest[42].
4901 """
4902 oInstr = self.ensureInstructionForOpTag(iTagLine);
4903
4904 iTest = 0;
4905 if sTag[-1] == ']':
4906 iTest = int(sTag[8:-1]);
4907 else:
4908 iTest = int(sTag[7:]);
4909
4910 if iTest != len(oInstr.aoTests):
4911 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4912 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4913
4914 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4915 """
4916 Tag: @optestign | @optestignore
4917 Value: <value is ignored>
4918
4919 This is a simple trick to ignore a test while debugging another.
4920
4921 See also @oponlytest.
4922 """
4923 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4924 return True;
4925
4926 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4927 """
4928 Tag: @opcopytests
4929 Value: <opstat | function> [..]
4930 Example: @opcopytests add_Eb_Gb
4931
4932 Trick to avoid duplicating tests for different encodings of the same
4933 operation.
4934 """
4935 oInstr = self.ensureInstructionForOpTag(iTagLine);
4936
4937 # Flatten, validate and append the copy job to the instruction. We execute
4938 # them after parsing all the input so we can handle forward references.
4939 asToCopy = self.flattenAllSections(aasSections).split();
4940 if not asToCopy:
4941 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4942 for sToCopy in asToCopy:
4943 if sToCopy not in oInstr.asCopyTests:
4944 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4945 oInstr.asCopyTests.append(sToCopy);
4946 else:
4947 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4948 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4949 else:
4950 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4951
4952 _ = iEndLine;
4953 return True;
4954
4955 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4956 """
4957 Tag: @oponlytest | @oponly
4958 Value: none
4959
4960 Only test instructions with this tag. This is a trick that is handy
4961 for singling out one or two new instructions or tests.
4962
4963 See also @optestignore.
4964 """
4965 oInstr = self.ensureInstructionForOpTag(iTagLine);
4966
4967 # Validate and add instruction to only test dictionary.
4968 sValue = self.flattenAllSections(aasSections).strip();
4969 if sValue:
4970 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4971
4972 if oInstr not in g_aoOnlyTestInstructions:
4973 g_aoOnlyTestInstructions.append(oInstr);
4974
4975 _ = iEndLine;
4976 return True;
4977
4978 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4979 """
4980 Tag: @opxcpttype
4981 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4982
4983 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4984 """
4985 oInstr = self.ensureInstructionForOpTag(iTagLine);
4986
4987 # Flatten as a space separated list, split it up and validate the values.
4988 asTypes = self.flattenAllSections(aasSections).split();
4989 if len(asTypes) != 1:
4990 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4991 sType = asTypes[0];
4992 if sType not in g_kdXcptTypes:
4993 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4994 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4995 # Set it.
4996 if oInstr.sXcptType is not None:
4997 return self.errorComment(iTagLine,
4998 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4999 % ( sTag, oInstr.sXcptType, sType,));
5000 oInstr.sXcptType = sType;
5001
5002 _ = iEndLine;
5003 return True;
5004
5005 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5006 """
5007 Tag: @opfunction
5008 Value: <VMM function name>
5009
5010 This is for explicitly setting the IEM function name. Normally we pick
5011 this up from the FNIEMOP_XXX macro invocation after the description, or
5012 generate it from the mnemonic and operands.
5013
5014 It it thought it maybe necessary to set it when specifying instructions
5015 which implementation isn't following immediately or aren't implemented yet.
5016 """
5017 oInstr = self.ensureInstructionForOpTag(iTagLine);
5018
5019 # Flatten and validate the value.
5020 sFunction = self.flattenAllSections(aasSections);
5021 if not self.oReFunctionName.match(sFunction):
5022 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5023 % (sTag, sFunction, self.oReFunctionName.pattern));
5024
5025 if oInstr.sFunction is not None:
5026 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5027 % (sTag, oInstr.sFunction, sFunction,));
5028 oInstr.sFunction = sFunction;
5029
5030 _ = iEndLine;
5031 return True;
5032
5033 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5034 """
5035 Tag: @opstats
5036 Value: <VMM statistics base name>
5037
5038 This is for explicitly setting the statistics name. Normally we pick
5039 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5040 the mnemonic and operands.
5041
5042 It it thought it maybe necessary to set it when specifying instructions
5043 which implementation isn't following immediately or aren't implemented yet.
5044 """
5045 oInstr = self.ensureInstructionForOpTag(iTagLine);
5046
5047 # Flatten and validate the value.
5048 sStats = self.flattenAllSections(aasSections);
5049 if not self.oReStatsName.match(sStats):
5050 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5051 % (sTag, sStats, self.oReStatsName.pattern));
5052
5053 if oInstr.sStats is not None:
5054 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5055 % (sTag, oInstr.sStats, sStats,));
5056 oInstr.sStats = sStats;
5057
5058 _ = iEndLine;
5059 return True;
5060
5061 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5062 """
5063 Tag: @opdone
5064 Value: none
5065
5066 Used to explictily flush the instructions that have been specified.
5067 """
5068 sFlattened = self.flattenAllSections(aasSections);
5069 if sFlattened != '':
5070 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5071 _ = sTag; _ = iEndLine;
5072 return self.doneInstructions();
5073
5074 ## @}
5075
5076
5077 def parseComment(self):
5078 """
5079 Parse the current comment (self.sComment).
5080
5081 If it's a opcode specifiying comment, we reset the macro stuff.
5082 """
5083 #
5084 # Reject if comment doesn't seem to contain anything interesting.
5085 #
5086 if self.sComment.find('Opcode') < 0 \
5087 and self.sComment.find('@') < 0:
5088 return False;
5089
5090 #
5091 # Split the comment into lines, removing leading asterisks and spaces.
5092 # Also remove leading and trailing empty lines.
5093 #
5094 asLines = self.sComment.split('\n');
5095 for iLine, sLine in enumerate(asLines):
5096 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5097
5098 while asLines and not asLines[0]:
5099 self.iCommentLine += 1;
5100 asLines.pop(0);
5101
5102 while asLines and not asLines[-1]:
5103 asLines.pop(len(asLines) - 1);
5104
5105 #
5106 # Check for old style: Opcode 0x0f 0x12
5107 #
5108 if asLines[0].startswith('Opcode '):
5109 self.parseCommentOldOpcode(asLines);
5110
5111 #
5112 # Look for @op* tagged data.
5113 #
5114 cOpTags = 0;
5115 sFlatDefault = None;
5116 sCurTag = '@default';
5117 iCurTagLine = 0;
5118 asCurSection = [];
5119 aasSections = [ asCurSection, ];
5120 for iLine, sLine in enumerate(asLines):
5121 if not sLine.startswith('@'):
5122 if sLine:
5123 asCurSection.append(sLine);
5124 elif asCurSection:
5125 asCurSection = [];
5126 aasSections.append(asCurSection);
5127 else:
5128 #
5129 # Process the previous tag.
5130 #
5131 if not asCurSection and len(aasSections) > 1:
5132 aasSections.pop(-1);
5133 if sCurTag in self.dTagHandlers:
5134 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5135 cOpTags += 1;
5136 elif sCurTag.startswith('@op'):
5137 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5138 elif sCurTag == '@default':
5139 sFlatDefault = self.flattenAllSections(aasSections);
5140 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5141 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5142 elif sCurTag in ['@encoding', '@opencoding']:
5143 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5144
5145 #
5146 # New tag.
5147 #
5148 asSplit = sLine.split(None, 1);
5149 sCurTag = asSplit[0].lower();
5150 if len(asSplit) > 1:
5151 asCurSection = [asSplit[1],];
5152 else:
5153 asCurSection = [];
5154 aasSections = [asCurSection, ];
5155 iCurTagLine = iLine;
5156
5157 #
5158 # Process the final tag.
5159 #
5160 if not asCurSection and len(aasSections) > 1:
5161 aasSections.pop(-1);
5162 if sCurTag in self.dTagHandlers:
5163 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5164 cOpTags += 1;
5165 elif sCurTag.startswith('@op'):
5166 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5167 elif sCurTag == '@default':
5168 sFlatDefault = self.flattenAllSections(aasSections);
5169
5170 #
5171 # Don't allow default text in blocks containing @op*.
5172 #
5173 if cOpTags > 0 and sFlatDefault:
5174 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5175
5176 return True;
5177
5178 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5179 """
5180 Parses a macro invocation.
5181
5182 Returns three values:
5183 1. A list of macro arguments, where the zero'th is the macro name.
5184 2. The offset following the macro invocation, into sInvocation of
5185 this is on the same line or into the last line if it is on a
5186 different line.
5187 3. Number of additional lines the invocation spans (i.e. zero if
5188 it is all contained within sInvocation).
5189 """
5190 # First the name.
5191 offOpen = sInvocation.find('(', offStartInvocation);
5192 if offOpen <= offStartInvocation:
5193 self.raiseError("macro invocation open parenthesis not found");
5194 sName = sInvocation[offStartInvocation:offOpen].strip();
5195 if not self.oReMacroName.match(sName):
5196 self.raiseError("invalid macro name '%s'" % (sName,));
5197 asRet = [sName, ];
5198
5199 # Arguments.
5200 iLine = self.iLine;
5201 cDepth = 1;
5202 off = offOpen + 1;
5203 offStart = off;
5204 offCurLn = 0;
5205 chQuote = None;
5206 while cDepth > 0:
5207 if off >= len(sInvocation):
5208 if iLine >= len(self.asLines):
5209 self.error('macro invocation beyond end of file');
5210 return (asRet, off - offCurLn, iLine - self.iLine);
5211 offCurLn = off;
5212 sInvocation += self.asLines[iLine];
5213 iLine += 1;
5214 ch = sInvocation[off];
5215
5216 if chQuote:
5217 if ch == '\\' and off + 1 < len(sInvocation):
5218 off += 1;
5219 elif ch == chQuote:
5220 chQuote = None;
5221 elif ch in ('"', '\'',):
5222 chQuote = ch;
5223 elif ch in (',', ')',):
5224 if cDepth == 1:
5225 asRet.append(sInvocation[offStart:off].strip());
5226 offStart = off + 1;
5227 if ch == ')':
5228 cDepth -= 1;
5229 elif ch == '(':
5230 cDepth += 1;
5231 off += 1;
5232
5233 return (asRet, off - offCurLn, iLine - self.iLine);
5234
5235 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5236 """
5237 Returns (None, len(sCode), 0) if not found, otherwise the
5238 parseMacroInvocation() return value.
5239 """
5240 offHit = sCode.find(sMacro, offStart);
5241 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5242 return self.parseMacroInvocation(sCode, offHit);
5243 return (None, len(sCode), 0);
5244
5245 def findAndParseMacroInvocation(self, sCode, sMacro):
5246 """
5247 Returns None if not found, arguments as per parseMacroInvocation if found.
5248 """
5249 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5250
5251 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5252 """
5253 Returns same as findAndParseMacroInvocation.
5254 """
5255 for sMacro in asMacro:
5256 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5257 if asRet is not None:
5258 return asRet;
5259 return None;
5260
5261 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5262 sDisHints, sIemHints, asOperands):
5263 """
5264 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5265 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5266 """
5267 #
5268 # Some invocation checks.
5269 #
5270 if sUpper != sUpper.upper():
5271 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5272 if sLower != sLower.lower():
5273 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5274 if sUpper.lower() != sLower:
5275 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5276 if not self.oReMnemonic.match(sLower):
5277 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5278
5279 #
5280 # Check if sIemHints tells us to not consider this macro invocation.
5281 #
5282 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5283 return True;
5284
5285 # Apply to the last instruction only for now.
5286 if not self.aoCurInstrs:
5287 self.addInstruction();
5288 oInstr = self.aoCurInstrs[-1];
5289 if oInstr.iLineMnemonicMacro == -1:
5290 oInstr.iLineMnemonicMacro = self.iLine;
5291 else:
5292 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5293 % (sMacro, oInstr.iLineMnemonicMacro,));
5294
5295 # Mnemonic
5296 if oInstr.sMnemonic is None:
5297 oInstr.sMnemonic = sLower;
5298 elif oInstr.sMnemonic != sLower:
5299 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5300
5301 # Process operands.
5302 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5303 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5304 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5305 for iOperand, sType in enumerate(asOperands):
5306 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5307 if sWhere is None:
5308 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5309 if iOperand < len(oInstr.aoOperands): # error recovery.
5310 sWhere = oInstr.aoOperands[iOperand].sWhere;
5311 sType = oInstr.aoOperands[iOperand].sType;
5312 else:
5313 sWhere = 'reg';
5314 sType = 'Gb';
5315 if iOperand == len(oInstr.aoOperands):
5316 oInstr.aoOperands.append(Operand(sWhere, sType))
5317 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5318 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5319 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5320 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5321
5322 # Encoding.
5323 if sForm not in g_kdIemForms:
5324 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5325 else:
5326 if oInstr.sEncoding is None:
5327 oInstr.sEncoding = g_kdIemForms[sForm][0];
5328 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5329 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5330 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5331
5332 # Check the parameter locations for the encoding.
5333 if g_kdIemForms[sForm][1] is not None:
5334 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5335 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5336 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5337 else:
5338 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5339 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5340 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5341 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5342 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5343 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5344 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5345 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5346 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5347 or sForm.replace('VEX','').find('V') < 0) ):
5348 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5349 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5350 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5351 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5352 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5353 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5354 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5355 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5356 oInstr.aoOperands[iOperand].sWhere));
5357
5358
5359 # Check @opcodesub
5360 if oInstr.sSubOpcode \
5361 and g_kdIemForms[sForm][2] \
5362 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5363 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5364 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5365
5366 # Stats.
5367 if not self.oReStatsName.match(sStats):
5368 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5369 elif oInstr.sStats is None:
5370 oInstr.sStats = sStats;
5371 elif oInstr.sStats != sStats:
5372 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5373 % (sMacro, oInstr.sStats, sStats,));
5374
5375 # Process the hints (simply merge with @ophints w/o checking anything).
5376 for sHint in sDisHints.split('|'):
5377 sHint = sHint.strip();
5378 if sHint.startswith('DISOPTYPE_'):
5379 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5380 if sShortHint in g_kdHints:
5381 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5382 else:
5383 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5384 elif sHint != '0':
5385 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5386
5387 for sHint in sIemHints.split('|'):
5388 sHint = sHint.strip();
5389 if sHint.startswith('IEMOPHINT_'):
5390 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5391 if sShortHint in g_kdHints:
5392 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5393 else:
5394 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5395 elif sHint != '0':
5396 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5397
5398 _ = sAsm;
5399 return True;
5400
5401 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5402 """
5403 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5404 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5405 """
5406 if not asOperands:
5407 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5408 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5409 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5410
5411 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5412 """
5413 Process a IEM_MC_BEGIN macro invocation.
5414 """
5415 if self.fDebugMc:
5416 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5417 #self.debug('%s<eos>' % (sCode,));
5418
5419 # Check preconditions.
5420 if not self.oCurFunction:
5421 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5422 if self.oCurMcBlock:
5423 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5424
5425 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5426 cchIndent = offBeginStatementInCodeStr;
5427 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5428 if offPrevNewline >= 0:
5429 cchIndent -= offPrevNewline + 1;
5430 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5431
5432 # Start a new block.
5433 # But don't add it to the list unless the context matches the host architecture.
5434 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5435 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5436 cchIndent = cchIndent);
5437 try:
5438 if ( not self.aoCppCondStack
5439 or not self.sHostArch
5440 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5441 g_aoMcBlocks.append(self.oCurMcBlock);
5442 self.cTotalMcBlocks += 1;
5443 except Exception as oXcpt:
5444 self.raiseError(oXcpt.args[0]);
5445
5446 if self.oCurMcBlock.oInstruction:
5447 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5448 self.iMcBlockInFunc += 1;
5449 return True;
5450
5451 @staticmethod
5452 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5453 """
5454 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5455 extracting a statement block from a string that's the result of macro
5456 expansion and therefore contains multiple "sub-lines" as it were.
5457
5458 Returns list of lines covering offBegin thru offEnd in sRawLine.
5459 """
5460
5461 off = sRawLine.find('\n', offEnd);
5462 if off > 0:
5463 sRawLine = sRawLine[:off + 1];
5464
5465 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5466 sRawLine = sRawLine[off:];
5467 if not sRawLine.strip().startswith(sBeginStmt):
5468 sRawLine = sRawLine[offBegin - off:]
5469
5470 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5471
5472 def workerIemMcEnd(self, offEndStatementInLine):
5473 """
5474 Process a IEM_MC_END macro invocation.
5475 """
5476 if self.fDebugMc:
5477 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5478
5479 # Check preconditions.
5480 if not self.oCurMcBlock:
5481 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5482
5483 #
5484 # HACK ALERT! For blocks originating from macro expansion the start and
5485 # end line will be the same, but the line has multiple
5486 # newlines inside it. So, we have to do some extra tricks
5487 # to get the lines out of there. We ASSUME macros aren't
5488 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5489 #
5490 if self.iLine > self.oCurMcBlock.iBeginLine:
5491 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5492 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5493 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5494
5495 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5496 # so we can deal correctly with IEM_MC_END below and everything else.
5497 for sLine in asLines:
5498 cNewLines = sLine.count('\n');
5499 assert cNewLines > 0;
5500 if cNewLines > 1:
5501 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5502 self.oCurMcBlock.offBeginLine,
5503 offEndStatementInLine
5504 + sum(len(s) for s in asLines)
5505 - len(asLines[-1]));
5506 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5507 break;
5508 else:
5509 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5510 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5511 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5512
5513 #
5514 # Strip anything following the IEM_MC_END(); statement in the final line,
5515 # so that we don't carry on any trailing 'break' after macro expansions
5516 # like for iemOp_movsb_Xb_Yb.
5517 #
5518 while asLines[-1].strip() == '':
5519 asLines.pop();
5520 sFinal = asLines[-1];
5521 offFinalEnd = sFinal.find('IEM_MC_END');
5522 offEndInFinal = offFinalEnd;
5523 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5524 offFinalEnd += len('IEM_MC_END');
5525
5526 while sFinal[offFinalEnd].isspace():
5527 offFinalEnd += 1;
5528 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5529 offFinalEnd += 1;
5530
5531 while sFinal[offFinalEnd].isspace():
5532 offFinalEnd += 1;
5533 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5534 offFinalEnd += 1;
5535
5536 while sFinal[offFinalEnd].isspace():
5537 offFinalEnd += 1;
5538 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5539 offFinalEnd += 1;
5540
5541 asLines[-1] = sFinal[: offFinalEnd];
5542
5543 #
5544 # Complete and discard the current block.
5545 #
5546 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5547 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5548 self.oCurMcBlock = None;
5549 return True;
5550
5551 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5552 """
5553 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5554 """
5555 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5556 if self.fDebugMc:
5557 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5558 #self.debug('%s<eos>' % (sCode,));
5559
5560 # Check preconditions.
5561 if not self.oCurFunction:
5562 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5563 if self.oCurMcBlock:
5564 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5565
5566 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5567 cchIndent = offBeginStatementInCodeStr;
5568 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5569 if offPrevNewline >= 0:
5570 cchIndent -= offPrevNewline + 1;
5571 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5572
5573 # Start a new block.
5574 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5575 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5576 cchIndent = cchIndent, fDeferToCImpl = True);
5577
5578 # Parse the statment.
5579 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5580 if asArgs is None:
5581 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5582 if len(asArgs) != cParams + 4:
5583 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5584 % (sStmt, len(asArgs), cParams + 4, asArgs));
5585
5586 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5587
5588 # These MCs are not typically part of macro expansions, but let's get
5589 # it out of the way immediately if it's the case.
5590 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5591 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5592 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5593 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5594 asLines[-1] = asLines[-1][:offAfter + 1];
5595 else:
5596 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5597 offAfter, sStmt);
5598 assert asLines[-1].find(';') >= 0;
5599 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5600
5601 assert asLines[0].find(sStmt) >= 0;
5602 #if not asLines[0].strip().startswith(sStmt):
5603 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5604
5605 # Advance to the line with the closing ')'.
5606 self.iLine += cLines;
5607
5608 # Complete the block.
5609 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5610
5611 g_aoMcBlocks.append(oMcBlock);
5612 if oMcBlock.oInstruction:
5613 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5614 self.cTotalMcBlocks += 1;
5615 self.iMcBlockInFunc += 1;
5616
5617 return True;
5618
5619 def workerStartFunction(self, asArgs):
5620 """
5621 Deals with the start of a decoder function.
5622
5623 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5624 macros, so we get a argument list for these where the 0th argument is the
5625 macro name.
5626 """
5627 # Complete any existing function.
5628 if self.oCurFunction:
5629 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5630
5631 # Create the new function.
5632 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5633 return True;
5634
5635 def checkCodeForMacro(self, sCode, offLine):
5636 """
5637 Checks code for relevant macro invocation.
5638 """
5639
5640 #
5641 # Scan macro invocations.
5642 #
5643 if sCode.find('(') > 0:
5644 # Look for instruction decoder function definitions. ASSUME single line.
5645 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5646 [ 'FNIEMOP_DEF',
5647 'FNIEMOPRM_DEF',
5648 'FNIEMOP_STUB',
5649 'FNIEMOP_STUB_1',
5650 'FNIEMOP_UD_STUB',
5651 'FNIEMOP_UD_STUB_1' ]);
5652 if asArgs is not None:
5653 self.workerStartFunction(asArgs);
5654 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5655
5656 if not self.aoCurInstrs:
5657 self.addInstruction();
5658 for oInstr in self.aoCurInstrs:
5659 if oInstr.iLineFnIemOpMacro == -1:
5660 oInstr.iLineFnIemOpMacro = self.iLine;
5661 else:
5662 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5663 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5664 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5665 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5666 if asArgs[0].find('STUB') > 0:
5667 self.doneInstructions(fEndOfFunction = True);
5668 return True;
5669
5670 # Check for worker function definitions, so we can get a context for MC blocks.
5671 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5672 [ 'FNIEMOP_DEF_1',
5673 'FNIEMOP_DEF_2', ]);
5674 if asArgs is not None:
5675 self.workerStartFunction(asArgs);
5676 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5677 return True;
5678
5679 # IEMOP_HLP_DONE_VEX_DECODING_*
5680 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5681 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5682 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5683 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5684 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5685 ]);
5686 if asArgs is not None:
5687 sMacro = asArgs[0];
5688 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5689 for oInstr in self.aoCurInstrs:
5690 if 'vex_l_zero' not in oInstr.dHints:
5691 if oInstr.iLineMnemonicMacro >= 0:
5692 self.errorOnLine(oInstr.iLineMnemonicMacro,
5693 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5694 oInstr.dHints['vex_l_zero'] = True;
5695
5696 #
5697 # IEMOP_MNEMONIC*
5698 #
5699 if sCode.find('IEMOP_MNEMONIC') >= 0:
5700 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5701 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5702 if asArgs is not None:
5703 if len(self.aoCurInstrs) == 1:
5704 oInstr = self.aoCurInstrs[0];
5705 if oInstr.sStats is None:
5706 oInstr.sStats = asArgs[1];
5707 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5708
5709 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5710 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5711 if asArgs is not None:
5712 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5713 asArgs[7], []);
5714 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5715 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5716 if asArgs is not None:
5717 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5718 asArgs[8], [asArgs[6],]);
5719 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5720 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5721 if asArgs is not None:
5722 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5723 asArgs[9], [asArgs[6], asArgs[7]]);
5724 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5725 # a_fIemHints)
5726 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5727 if asArgs is not None:
5728 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5729 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5730 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5731 # a_fIemHints)
5732 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5733 if asArgs is not None:
5734 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5735 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5736
5737 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5738 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5739 if asArgs is not None:
5740 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5741 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5742 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5743 if asArgs is not None:
5744 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5745 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5746 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5747 if asArgs is not None:
5748 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5749 [asArgs[4], asArgs[5],]);
5750 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5751 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5752 if asArgs is not None:
5753 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5754 [asArgs[4], asArgs[5], asArgs[6],]);
5755 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5756 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5757 if asArgs is not None:
5758 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5759 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5760
5761 #
5762 # IEM_MC_BEGIN + IEM_MC_END.
5763 # We must support multiple instances per code snippet.
5764 #
5765 offCode = sCode.find('IEM_MC_');
5766 if offCode >= 0:
5767 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5768 if oMatch.group(1) == 'END':
5769 self.workerIemMcEnd(offLine + oMatch.start());
5770 elif oMatch.group(1) == 'BEGIN':
5771 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5772 else:
5773 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5774 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5775 return True;
5776
5777 return False;
5778
5779 def workerPreprocessorRecreateMacroRegex(self):
5780 """
5781 Recreates self.oReMacros when self.dMacros changes.
5782 """
5783 if self.dMacros:
5784 sRegex = '';
5785 for sName, oMacro in self.dMacros.items():
5786 if sRegex:
5787 sRegex += r'|' + sName;
5788 else:
5789 sRegex = r'\b(' + sName;
5790 if oMacro.asArgs is not None:
5791 sRegex += r'\s*\(';
5792 else:
5793 sRegex += r'\b';
5794 sRegex += ')';
5795 self.oReMacros = re.compile(sRegex);
5796 else:
5797 self.oReMacros = None;
5798 return True;
5799
5800 def workerPreprocessorDefine(self, sRest):
5801 """
5802 Handles a macro #define, the sRest is what follows after the directive word.
5803 """
5804 assert sRest[-1] == '\n';
5805
5806 #
5807 # If using line continutation, just concat all the lines together,
5808 # preserving the newline character but not the escaping.
5809 #
5810 iLineStart = self.iLine;
5811 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5812 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5813 self.iLine += 1;
5814 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5815
5816 #
5817 # Use regex to split out the name, argument list and body.
5818 # If this fails, we assume it's a simple macro.
5819 #
5820 oMatch = self.oReHashDefine2.match(sRest);
5821 if oMatch:
5822 sAllArgs = oMatch.group(2).strip();
5823 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5824 sBody = oMatch.group(3);
5825 else:
5826 oMatch = self.oReHashDefine3.match(sRest);
5827 if not oMatch:
5828 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5829 return self.error('bogus macro definition: %s' % (sRest,));
5830 asArgs = None;
5831 sBody = oMatch.group(2);
5832 sName = oMatch.group(1);
5833 assert sName == sName.strip();
5834 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5835
5836 #
5837 # Is this of any interest to us? We do NOT support MC blocks wihtin
5838 # nested macro expansion, just to avoid lots of extra work.
5839 #
5840 # There is only limited support for macros expanding to partial MC blocks.
5841 #
5842 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5843 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5844 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5845 # siblings in the recompiler. This is a lot simpler than nested macro
5846 # expansion and lots of heuristics for locating all the relevant macros.
5847 # Also, this way we don't produce lots of unnecessary threaded functions.
5848 #
5849 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5850 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5851 return True;
5852
5853 #
5854 # Add the macro.
5855 #
5856 if self.fDebugPreproc:
5857 self.debug('#define %s on line %u' % (sName, self.iLine,));
5858 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5859 return self.workerPreprocessorRecreateMacroRegex();
5860
5861 def workerPreprocessorUndef(self, sRest):
5862 """
5863 Handles a macro #undef, the sRest is what follows after the directive word.
5864 """
5865 # Quick comment strip and isolate the name.
5866 offSlash = sRest.find('/');
5867 if offSlash > 0:
5868 sRest = sRest[:offSlash];
5869 sName = sRest.strip();
5870
5871 # Remove the macro if we're clocking it.
5872 if sName in self.dMacros:
5873 if self.fDebugPreproc:
5874 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5875 del self.dMacros[sName];
5876 return self.workerPreprocessorRecreateMacroRegex();
5877
5878 return True;
5879
5880 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5881 """
5882 Handles an #if, #ifdef, #ifndef or #elif directive.
5883 """
5884 #
5885 # Sanity check #elif.
5886 #
5887 if sDirective == 'elif':
5888 if len(self.aoCppCondStack) == 0:
5889 self.raiseError('#elif without #if');
5890 if self.aoCppCondStack[-1].fInElse:
5891 self.raiseError('#elif after #else');
5892
5893 #
5894 # If using line continutation, just concat all the lines together,
5895 # stripping both the newline and escape characters.
5896 #
5897 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5898 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5899 self.iLine += 1;
5900
5901 # Strip it of all comments and leading and trailing blanks.
5902 sRest = self.stripComments(sRest).strip();
5903
5904 #
5905 # Stash it.
5906 #
5907 try:
5908 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5909 except Exception as oXcpt:
5910 self.raiseError(oXcpt.args[0]);
5911
5912 if sDirective == 'elif':
5913 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5914 else:
5915 self.aoCppCondStack.append(oPreprocCond);
5916
5917 return True;
5918
5919 def workerPreprocessorElse(self):
5920 """
5921 Handles an #else directive.
5922 """
5923 if len(self.aoCppCondStack) == 0:
5924 self.raiseError('#else without #if');
5925 if self.aoCppCondStack[-1].fInElse:
5926 self.raiseError('Another #else after #else');
5927
5928 self.aoCppCondStack[-1].fInElse = True;
5929 return True;
5930
5931 def workerPreprocessorEndif(self):
5932 """
5933 Handles an #endif directive.
5934 """
5935 if len(self.aoCppCondStack) == 0:
5936 self.raiseError('#endif without #if');
5937
5938 self.aoCppCondStack.pop();
5939 return True;
5940
5941 def checkPreprocessorDirective(self, sLine):
5942 """
5943 Handles a preprocessor directive.
5944 """
5945 # Skip past the preprocessor hash.
5946 off = sLine.find('#');
5947 assert off >= 0;
5948 off += 1;
5949 while off < len(sLine) and sLine[off].isspace():
5950 off += 1;
5951
5952 # Extract the directive.
5953 offDirective = off;
5954 while off < len(sLine) and not sLine[off].isspace():
5955 off += 1;
5956 sDirective = sLine[offDirective:off];
5957 if self.fDebugPreproc:
5958 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5959
5960 # Skip spaces following it to where the arguments/whatever starts.
5961 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5962 off += 1;
5963 sTail = sLine[off:];
5964
5965 # Handle the directive.
5966 if sDirective == 'define':
5967 return self.workerPreprocessorDefine(sTail);
5968 if sDirective == 'undef':
5969 return self.workerPreprocessorUndef(sTail);
5970 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5971 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5972 if sDirective == 'else':
5973 return self.workerPreprocessorElse();
5974 if sDirective == 'endif':
5975 return self.workerPreprocessorEndif();
5976
5977 if self.fDebugPreproc:
5978 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5979 return False;
5980
5981 def expandMacros(self, sLine, oMatch):
5982 """
5983 Expands macros we know about in the given line.
5984 Currently we ASSUME there is only one and that is what oMatch matched.
5985 """
5986 #
5987 # Get our bearings.
5988 #
5989 offMatch = oMatch.start();
5990 sName = oMatch.group(1);
5991 assert sName == sLine[oMatch.start() : oMatch.end()];
5992 fWithArgs = sName.endswith('(');
5993 if fWithArgs:
5994 sName = sName[:-1].strip();
5995 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5996
5997 #
5998 # Deal with simple macro invocations w/o parameters.
5999 #
6000 if not fWithArgs:
6001 if self.fDebugPreproc:
6002 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6003 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6004
6005 #
6006 # Complicated macro with parameters.
6007 # Start by extracting the parameters. ASSUMES they are all on the same line!
6008 #
6009 cLevel = 1;
6010 offCur = oMatch.end();
6011 offCurArg = offCur;
6012 asArgs = [];
6013 while True:
6014 if offCur >= len(sLine):
6015 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6016 ch = sLine[offCur];
6017 if ch == '(':
6018 cLevel += 1;
6019 elif ch == ')':
6020 cLevel -= 1;
6021 if cLevel == 0:
6022 asArgs.append(sLine[offCurArg:offCur].strip());
6023 break;
6024 elif ch == ',' and cLevel == 1:
6025 asArgs.append(sLine[offCurArg:offCur].strip());
6026 offCurArg = offCur + 1;
6027 offCur += 1;
6028 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6029 asArgs = [];
6030 if len(oMacro.asArgs) != len(asArgs):
6031 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6032
6033 #
6034 # Do the expanding.
6035 #
6036 if self.fDebugPreproc:
6037 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6038 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6039
6040 def parse(self):
6041 """
6042 Parses the given file.
6043
6044 Returns number or errors.
6045 Raises exception on fatal trouble.
6046 """
6047 #self.debug('Parsing %s' % (self.sSrcFile,));
6048
6049 #
6050 # Loop thru the lines.
6051 #
6052 # Please mind that self.iLine may be updated by checkCodeForMacro and
6053 # other worker methods.
6054 #
6055 while self.iLine < len(self.asLines):
6056 sLine = self.asLines[self.iLine];
6057 self.iLine += 1;
6058 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6059
6060 # Expand macros we know about if we're currently in code.
6061 if self.iState == self.kiCode and self.oReMacros:
6062 oMatch = self.oReMacros.search(sLine);
6063 if oMatch:
6064 sLine = self.expandMacros(sLine, oMatch);
6065 if self.fDebugPreproc:
6066 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6067 self.asLines[self.iLine - 1] = sLine;
6068
6069 # Check for preprocessor directives before comments and other stuff.
6070 # ASSUMES preprocessor directives doesn't end with multiline comments.
6071 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6072 if self.fDebugPreproc:
6073 self.debug('line %d: preproc' % (self.iLine,));
6074 self.checkPreprocessorDirective(sLine);
6075 else:
6076 # Look for comments.
6077 offSlash = sLine.find('/');
6078 if offSlash >= 0:
6079 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6080 offLine = 0;
6081 while offLine < len(sLine):
6082 if self.iState == self.kiCode:
6083 # Look for substantial multiline comment so we pass the following MC as a whole line:
6084 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6085 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6086 offHit = sLine.find('/*', offLine);
6087 while offHit >= 0:
6088 offEnd = sLine.find('*/', offHit + 2);
6089 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6090 break;
6091 offHit = sLine.find('/*', offEnd);
6092
6093 if offHit >= 0:
6094 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6095 self.sComment = '';
6096 self.iCommentLine = self.iLine;
6097 self.iState = self.kiCommentMulti;
6098 offLine = offHit + 2;
6099 else:
6100 self.checkCodeForMacro(sLine[offLine:], offLine);
6101 offLine = len(sLine);
6102
6103 elif self.iState == self.kiCommentMulti:
6104 offHit = sLine.find('*/', offLine);
6105 if offHit >= 0:
6106 self.sComment += sLine[offLine:offHit];
6107 self.iState = self.kiCode;
6108 offLine = offHit + 2;
6109 self.parseComment();
6110 else:
6111 self.sComment += sLine[offLine:];
6112 offLine = len(sLine);
6113 else:
6114 assert False;
6115 # C++ line comment.
6116 elif offSlash > 0:
6117 self.checkCodeForMacro(sLine[:offSlash], 0);
6118
6119 # No slash, but append the line if in multi-line comment.
6120 elif self.iState == self.kiCommentMulti:
6121 #self.debug('line %d: multi' % (self.iLine,));
6122 self.sComment += sLine;
6123
6124 # No slash, but check code line for relevant macro.
6125 elif ( self.iState == self.kiCode
6126 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6127 #self.debug('line %d: macro' % (self.iLine,));
6128 self.checkCodeForMacro(sLine, 0);
6129
6130 # If the line is a '}' in the first position, complete the instructions.
6131 elif self.iState == self.kiCode and sLine[0] == '}':
6132 #self.debug('line %d: }' % (self.iLine,));
6133 self.doneInstructions(fEndOfFunction = True);
6134
6135 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6136 # so we can check/add @oppfx info from it.
6137 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6138 self.parseFunctionTable(sLine);
6139
6140 self.doneInstructions(fEndOfFunction = True);
6141 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6142 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6143 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6144 return self.printErrors();
6145
6146# Some sanity checking.
6147def __sanityCheckEFlagsClasses():
6148 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6149 for sAttrib, asFlags in dLists.items():
6150 for sFlag in asFlags:
6151 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6152__sanityCheckEFlagsClasses();
6153
6154## The parsed content of IEMAllInstCommonBodyMacros.h.
6155g_oParsedCommonBodyMacros = None # type: SimpleParser
6156
6157def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6158 """
6159 Parses one source file for instruction specfications.
6160 """
6161 #
6162 # Read sSrcFile into a line array.
6163 #
6164 try:
6165 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6166 except Exception as oXcpt:
6167 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6168 try:
6169 asLines = oFile.readlines();
6170 except Exception as oXcpt:
6171 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6172 finally:
6173 oFile.close();
6174
6175 #
6176 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6177 # can use the macros from it when processing the other files.
6178 #
6179 global g_oParsedCommonBodyMacros;
6180 if g_oParsedCommonBodyMacros is None:
6181 # Locate the file.
6182 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6183 if not os.path.isfile(sCommonBodyMacros):
6184 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6185
6186 # Read it.
6187 try:
6188 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6189 asIncFiles = oIncFile.readlines();
6190 except Exception as oXcpt:
6191 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6192
6193 # Parse it.
6194 try:
6195 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6196 if oParser.parse() != 0:
6197 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6198 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6199 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6200 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6201 oParser.cTotalMcBlocks,
6202 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6203 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6204 except ParserException as oXcpt:
6205 print(str(oXcpt), file = sys.stderr);
6206 raise;
6207 g_oParsedCommonBodyMacros = oParser;
6208
6209 #
6210 # Do the parsing.
6211 #
6212 try:
6213 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6214 return (oParser.parse(), oParser) ;
6215 except ParserException as oXcpt:
6216 print(str(oXcpt), file = sys.stderr);
6217 raise;
6218
6219
6220def __doTestCopying():
6221 """
6222 Executes the asCopyTests instructions.
6223 """
6224 asErrors = [];
6225 for oDstInstr in g_aoAllInstructions:
6226 if oDstInstr.asCopyTests:
6227 for sSrcInstr in oDstInstr.asCopyTests:
6228 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6229 if oSrcInstr:
6230 aoSrcInstrs = [oSrcInstr,];
6231 else:
6232 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6233 if aoSrcInstrs:
6234 for oSrcInstr in aoSrcInstrs:
6235 if oSrcInstr != oDstInstr:
6236 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6237 else:
6238 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6239 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6240 else:
6241 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6242 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6243
6244 if asErrors:
6245 sys.stderr.write(u''.join(asErrors));
6246 return len(asErrors);
6247
6248
6249def __applyOnlyTest():
6250 """
6251 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6252 all other instructions so that only these get tested.
6253 """
6254 if g_aoOnlyTestInstructions:
6255 for oInstr in g_aoAllInstructions:
6256 if oInstr.aoTests:
6257 if oInstr not in g_aoOnlyTestInstructions:
6258 oInstr.aoTests = [];
6259 return 0;
6260
6261## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6262g_aaoAllInstrFilesAndDefaultMapAndSet = (
6263 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6264 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6265 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6266 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6267 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6268 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6269 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6270 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6271 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6272);
6273
6274def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6275 """
6276 Parses all the IEMAllInstruction*.cpp.h files.
6277
6278 Returns a list of the parsers on success.
6279 Raises exception on failure.
6280 """
6281 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6282 cErrors = 0;
6283 aoParsers = [];
6284 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6285 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6286 sFilename = os.path.join(sSrcDir, sFilename);
6287 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6288 cErrors += cThisErrors;
6289 aoParsers.append(oParser);
6290 cErrors += __doTestCopying();
6291 cErrors += __applyOnlyTest();
6292
6293 # Total stub stats:
6294 cTotalStubs = 0;
6295 for oInstr in g_aoAllInstructions:
6296 cTotalStubs += oInstr.fStub;
6297 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6298 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6299 file = sys.stderr);
6300
6301 if cErrors != 0:
6302 raise Exception('%d parse errors' % (cErrors,));
6303 return aoParsers;
6304
6305
6306def parseFiles(asFiles, sHostArch = None):
6307 """
6308 Parses a selection of IEMAllInstruction*.cpp.h files.
6309
6310 Returns a list of the parsers on success.
6311 Raises exception on failure.
6312 """
6313 # Look up default maps for the files and call __parseFilesWorker to do the job.
6314 asFilesAndDefaultMap = [];
6315 for sFilename in asFiles:
6316 sName = os.path.split(sFilename)[1].lower();
6317 sMap = None;
6318 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6319 if aoInfo[0].lower() == sName:
6320 sMap = aoInfo[1];
6321 break;
6322 if not sMap:
6323 raise Exception('Unable to classify file: %s' % (sFilename,));
6324 asFilesAndDefaultMap.append((sFilename, sMap));
6325
6326 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6327
6328
6329def parseAll(sHostArch = None):
6330 """
6331 Parses all the IEMAllInstruction*.cpp.h files.
6332
6333 Returns a list of the parsers on success.
6334 Raises exception on failure.
6335 """
6336 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6337
6338
6339#
6340# Generators (may perhaps move later).
6341#
6342def __formatDisassemblerTableEntry(oInstr):
6343 """
6344 """
6345 sMacro = 'OP';
6346 cMaxOperands = 3;
6347 if len(oInstr.aoOperands) > 3:
6348 sMacro = 'OPVEX'
6349 cMaxOperands = 4;
6350 assert len(oInstr.aoOperands) <= cMaxOperands;
6351
6352 #
6353 # Format string.
6354 #
6355 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6356 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6357 sTmp += ' ' if iOperand == 0 else ',';
6358 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6359 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6360 else:
6361 sTmp += g_kdOpTypes[oOperand.sType][2];
6362 sTmp += '",';
6363 asColumns = [ sTmp, ];
6364
6365 #
6366 # Decoders.
6367 #
6368 iStart = len(asColumns);
6369 if oInstr.sEncoding is None:
6370 pass;
6371 elif oInstr.sEncoding == 'ModR/M':
6372 # ASSUME the first operand is using the ModR/M encoding
6373 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6374 asColumns.append('IDX_ParseModRM,');
6375 elif oInstr.sEncoding in [ 'prefix', ]:
6376 for oOperand in oInstr.aoOperands:
6377 asColumns.append('0,');
6378 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6379 pass;
6380 elif oInstr.sEncoding == 'VEX.ModR/M':
6381 asColumns.append('IDX_ParseModRM,');
6382 elif oInstr.sEncoding == 'vex2':
6383 asColumns.append('IDX_ParseVex2b,')
6384 elif oInstr.sEncoding == 'vex3':
6385 asColumns.append('IDX_ParseVex3b,')
6386 elif oInstr.sEncoding in g_dInstructionMaps:
6387 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6388 else:
6389 ## @todo
6390 #IDX_ParseTwoByteEsc,
6391 #IDX_ParseGrp1,
6392 #IDX_ParseShiftGrp2,
6393 #IDX_ParseGrp3,
6394 #IDX_ParseGrp4,
6395 #IDX_ParseGrp5,
6396 #IDX_Parse3DNow,
6397 #IDX_ParseGrp6,
6398 #IDX_ParseGrp7,
6399 #IDX_ParseGrp8,
6400 #IDX_ParseGrp9,
6401 #IDX_ParseGrp10,
6402 #IDX_ParseGrp12,
6403 #IDX_ParseGrp13,
6404 #IDX_ParseGrp14,
6405 #IDX_ParseGrp15,
6406 #IDX_ParseGrp16,
6407 #IDX_ParseThreeByteEsc4,
6408 #IDX_ParseThreeByteEsc5,
6409 #IDX_ParseModFence,
6410 #IDX_ParseEscFP,
6411 #IDX_ParseNopPause,
6412 #IDX_ParseInvOpModRM,
6413 assert False, str(oInstr);
6414
6415 # Check for immediates and stuff in the remaining operands.
6416 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6417 sIdx = g_kdOpTypes[oOperand.sType][0];
6418 #if sIdx != 'IDX_UseModRM':
6419 asColumns.append(sIdx + ',');
6420 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6421
6422 #
6423 # Opcode and operands.
6424 #
6425 assert oInstr.sDisEnum, str(oInstr);
6426 asColumns.append(oInstr.sDisEnum + ',');
6427 iStart = len(asColumns)
6428 for oOperand in oInstr.aoOperands:
6429 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6430 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6431
6432 #
6433 # Flags.
6434 #
6435 sTmp = '';
6436 for sHint in sorted(oInstr.dHints.keys()):
6437 sDefine = g_kdHints[sHint];
6438 if sDefine.startswith('DISOPTYPE_'):
6439 if sTmp:
6440 sTmp += ' | ' + sDefine;
6441 else:
6442 sTmp += sDefine;
6443 if sTmp:
6444 sTmp += '),';
6445 else:
6446 sTmp += '0),';
6447 asColumns.append(sTmp);
6448
6449 #
6450 # Format the columns into a line.
6451 #
6452 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6453 sLine = '';
6454 for i, s in enumerate(asColumns):
6455 if len(sLine) < aoffColumns[i]:
6456 sLine += ' ' * (aoffColumns[i] - len(sLine));
6457 else:
6458 sLine += ' ';
6459 sLine += s;
6460
6461 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6462 # DISOPTYPE_HARMLESS),
6463 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6464 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6465 return sLine;
6466
6467def __checkIfShortTable(aoTableOrdered, oMap):
6468 """
6469 Returns (iInstr, cInstructions, fShortTable)
6470 """
6471
6472 # Determin how much we can trim off.
6473 cInstructions = len(aoTableOrdered);
6474 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6475 cInstructions -= 1;
6476
6477 iInstr = 0;
6478 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6479 iInstr += 1;
6480
6481 # If we can save more than 30%, we go for the short table version.
6482 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6483 return (iInstr, cInstructions, True);
6484 _ = oMap; # Use this for overriding.
6485
6486 # Output the full table.
6487 return (0, len(aoTableOrdered), False);
6488
6489def generateDisassemblerTables(oDstFile = sys.stdout):
6490 """
6491 Generates disassembler tables.
6492
6493 Returns exit code.
6494 """
6495
6496 #
6497 # Parse all.
6498 #
6499 try:
6500 parseAll();
6501 except Exception as oXcpt:
6502 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6503 traceback.print_exc(file = sys.stderr);
6504 return 1;
6505
6506
6507 #
6508 # The disassembler uses a slightly different table layout to save space,
6509 # since several of the prefix varia
6510 #
6511 aoDisasmMaps = [];
6512 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6513 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6514 if oMap.sSelector != 'byte+pfx':
6515 aoDisasmMaps.append(oMap);
6516 else:
6517 # Split the map by prefix.
6518 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6519 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6520 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6521 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6522
6523 #
6524 # Dump each map.
6525 #
6526 asHeaderLines = [];
6527 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6528 for oMap in aoDisasmMaps:
6529 sName = oMap.sName;
6530
6531 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6532
6533 #
6534 # Get the instructions for the map and see if we can do a short version or not.
6535 #
6536 aoTableOrder = oMap.getInstructionsInTableOrder();
6537 cEntriesPerByte = oMap.getEntriesPerByte();
6538 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6539
6540 #
6541 # Output the table start.
6542 # Note! Short tables are static and only accessible via the map range record.
6543 #
6544 asLines = [];
6545 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6546 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6547 if fShortTable:
6548 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6549 else:
6550 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6551 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6552 asLines.append('{');
6553
6554 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6555 asLines.append(' /* %#04x: */' % (iInstrStart,));
6556
6557 #
6558 # Output the instructions.
6559 #
6560 iInstr = iInstrStart;
6561 while iInstr < iInstrEnd:
6562 oInstr = aoTableOrder[iInstr];
6563 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6564 if iInstr != iInstrStart:
6565 asLines.append('');
6566 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6567
6568 if oInstr is None:
6569 # Invalid. Optimize blocks of invalid instructions.
6570 cInvalidInstrs = 1;
6571 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6572 cInvalidInstrs += 1;
6573 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6574 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6575 iInstr += 0x10 * cEntriesPerByte - 1;
6576 elif cEntriesPerByte > 1:
6577 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6578 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6579 iInstr += 3;
6580 else:
6581 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6582 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6583 else:
6584 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6585 elif isinstance(oInstr, list):
6586 if len(oInstr) != 0:
6587 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6588 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6589 else:
6590 asLines.append(__formatDisassemblerTableEntry(oInstr));
6591 else:
6592 asLines.append(__formatDisassemblerTableEntry(oInstr));
6593
6594 iInstr += 1;
6595
6596 if iInstrStart >= iInstrEnd:
6597 asLines.append(' /* dummy */ INVALID_OPCODE');
6598
6599 asLines.append('};');
6600 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6601
6602 #
6603 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6604 #
6605 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6606 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6607 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6608
6609 #
6610 # Write out the lines.
6611 #
6612 oDstFile.write('\n'.join(asLines));
6613 oDstFile.write('\n');
6614 oDstFile.write('\n');
6615 #break; #for now
6616 return 0;
6617
6618if __name__ == '__main__':
6619 sys.exit(generateDisassemblerTables());
6620
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette