VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103922

Last change on this file since 103922 was 103921, checked in by vboxsync, 9 months ago

VMM/IEM: Fixed bug in vblendvps/d when working with 256-bit memory ops. Addressed the mnemonic todos. bugref:9898

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 323.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103921 2024-03-19 15:38:20Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103921 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199 'imm8reg': [], ## byte immediate with register specified in bits 4 thru 7 (vpblendvb, vblendvps, vblendvpd).
200
201 # fixed registers.
202 'AL': [],
203 'rAX': [],
204 'rDX': [],
205 'CL': [],
206 'rSI': [],
207 'rDI': [],
208 'rFLAGS': [],
209 'CS': [],
210 'DS': [],
211 'ES': [],
212 'FS': [],
213 'GS': [],
214 'SS': [],
215
216 # fixed valures.
217 '1': [],
218};
219
220## \@op[1-4] types
221##
222## Value fields:
223## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
224## - 1: the location (g_kdOpLocations).
225## - 2: disassembler format string version of the type.
226## - 3: disassembler OP_PARAM_XXX (XXX only).
227## - 4: IEM form matching instruction.
228##
229## Note! See the A.2.1 in SDM vol 2 for the type names.
230g_kdOpTypes = {
231 # Fixed addresses
232 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
233
234 # ModR/M.rm
235 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
236 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
238 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
240 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
241 'Ew_WO': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
242 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
243 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
244 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
245 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
246 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
247 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
248 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
249 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
250 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
251 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
252 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
253 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
254 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
255 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
256 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
257 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
259 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
260 'Wqq': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
261 'Wqq_WO': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
262 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
263 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
264
265 # ModR/M.rm - register only.
266 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
267 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
268 'Uqq': ( 'IDX_UseModRM', 'rm', '%Uqq', 'Uqq', 'REG' ),
269 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
270 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
271 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
272 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
273 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
274 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
275
276 # ModR/M.rm - memory only.
277 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
278 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
279 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
280 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
281 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
282 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
283 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
284 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
285 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
286 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
287 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
288 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
289 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
290 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
291 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
292
293 # ModR/M.reg
294 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
295 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
296 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
297 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
298 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
299 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
300 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
301 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
302 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
303 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
304 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
305 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
306 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
307 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
308 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
309 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
310 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
311 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
312 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
313 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
314 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
315 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
316 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
317 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
318 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
319 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
320 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
321 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
322 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
323 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
324 'Vqq': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
325 'Vqq_WO': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
326 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
327 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
328
329 # VEX.vvvv
330 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
331 'Hdq': ( 'IDX_UseModRM', 'vvvv', '%Hdq', 'Hdq', 'V', ),
332 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
333 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
334 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
335 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
336 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
337 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
338 'Hqq': ( 'IDX_UseModRM', 'vvvv', '%Hqq', 'Hqq', 'V', ),
339 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
340
341 # Immediate values.
342 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
343 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
344 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
345 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
346 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
347 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
348
349 # Registers encoded as immediates.
350 'Lx': ( 'IDX_ParseImmByte', 'imm8reg', '%Lx', 'Lx', '', ),
351
352 # Address operands (no ModR/M).
353 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
354 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
355
356 # Relative jump targets
357 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
358 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
359
360 # DS:rSI
361 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
362 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
363 # ES:rDI
364 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
365 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
366
367 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
368
369 # Fixed registers.
370 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
371 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
372 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
373 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
374 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
375 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
376 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
377 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
378 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
379 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
380
381 # Fixed values.
382 '1': ( '', '1', '1', '1', '', ),
383};
384
385# IDX_ParseFixedReg
386# IDX_ParseVexDest
387
388
389## IEMFORM_XXX mappings.
390g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
391 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
392 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
393 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
394 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
395 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
396 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
397 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
398 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
399 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
400 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
401 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
402 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
403 'M': ( 'ModR/M', [ 'rm', ], '', ),
404 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
405 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
406 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
407 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
408 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
409 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
410 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
411 'R': ( 'ModR/M', [ 'reg', ], '', ),
412
413 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
414 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
415 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
416 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
417 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
418 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
419 'VEX_MRI': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
420 'VEX_MRI_REG': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
421 'VEX_MRI_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
422 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
423 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
424 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
425 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
426 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
427 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
428 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
429 'VEX_RVMI': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '', ),
430 'VEX_RVMI_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
431 'VEX_RVMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
432 'VEX_RVMR': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '', ),
433 'VEX_RVMR_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '11 mr/reg', ),
434 'VEX_RVMR_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '!11 mr/reg', ),
435 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
436 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
437 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
438 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
439 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
440 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
441 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
442 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
443 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
444
445 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
446 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
447 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
448 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
449 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
450 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
451
452 'FIXED': ( 'fixed', None, '', ),
453};
454
455## \@oppfx values.
456g_kdPrefixes = {
457 'none': [],
458 '0x66': [],
459 '0xf3': [],
460 '0xf2': [],
461 '!0xf3': [], # special case for bsf/tzcnt
462};
463
464## Special \@opcode tag values.
465g_kdSpecialOpcodes = {
466 '/reg': [],
467 'mr/reg': [],
468 '11 /reg': [],
469 '!11 /reg': [],
470 '11 mr/reg': [],
471 '!11 mr/reg': [],
472};
473
474## Special \@opcodesub tag values.
475## The first value is the real value for aliases.
476## The second value is for bs3cg1.
477g_kdSubOpcodes = {
478 'none': [ None, '', ],
479 '11 mr/reg': [ '11 mr/reg', '', ],
480 '11': [ '11 mr/reg', '', ], ##< alias
481 '!11 mr/reg': [ '!11 mr/reg', '', ],
482 '!11': [ '!11 mr/reg', '', ], ##< alias
483 'rex.w=0': [ 'rex.w=0', 'WZ', ],
484 'w=0': [ 'rex.w=0', '', ], ##< alias
485 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
486 'w=1': [ 'rex.w=1', '', ], ##< alias
487 'vex.l=0': [ 'vex.l=0', 'L0', ],
488 'vex.l=1': [ 'vex.l=0', 'L1', ],
489 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
490 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
491 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
492 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
493 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
494 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
495};
496
497## Valid values for \@openc
498g_kdEncodings = {
499 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
500 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
501 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
502 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
503 'prefix': [ None, ], ##< Prefix
504};
505
506## \@opunused, \@opinvalid, \@opinvlstyle
507g_kdInvalidStyles = {
508 'immediate': [], ##< CPU stops decoding immediately after the opcode.
509 'vex.modrm': [], ##< VEX+ModR/M, everyone.
510 'intel-modrm': [], ##< Intel decodes ModR/M.
511 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
512 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
513 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
514};
515
516g_kdCpuNames = {
517 '8086': (),
518 '80186': (),
519 '80286': (),
520 '80386': (),
521 '80486': (),
522};
523
524## \@opcpuid
525g_kdCpuIdFlags = {
526 'vme': 'X86_CPUID_FEATURE_EDX_VME',
527 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
528 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
529 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
530 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
531 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
532 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
533 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
534 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
535 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
536 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
537 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
538 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
539 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
540 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
541 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
542 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
543 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
544 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
545 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
546 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
547 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
548 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
549 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
550 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
551 'aes': 'X86_CPUID_FEATURE_ECX_AES',
552 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
553 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
554 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
555 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
556 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
557
558 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
559 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
560 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
561 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
562 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
563 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
564 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
565 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
566 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
567 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
568};
569
570## \@ophints values.
571# pylint: disable=line-too-long
572g_kdHints = {
573 'invalid': 'DISOPTYPE_INVALID', ##<
574 'harmless': 'DISOPTYPE_HARMLESS', ##<
575 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
576 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
577 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
578 'portio': 'DISOPTYPE_PORTIO', ##<
579 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
580 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
581 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
582 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
583 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
584 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
585 'illegal': 'DISOPTYPE_ILLEGAL', ##<
586 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
587 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
588 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
589 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
590 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
591 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
592 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
593 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
594 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
595 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
596 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
597 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
598 ## (only in 16 & 32 bits mode!)
599 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
600 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
601 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
602 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
603 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
604 'ignores_rexw': '', ##< Ignores REX.W.
605 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
606 'vex_l_zero': '', ##< VEX.L must be 0.
607 'vex_l_one': '', ##< VEX.L must be 1.
608 'vex_l_ignored': '', ##< VEX.L is ignored.
609 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
610 'lock_allowed': '', ##< Lock prefix allowed.
611};
612# pylint: enable=line-too-long
613
614## \@opxcpttype values (see SDMv2 2.4, 2.7).
615g_kdXcptTypes = {
616 'none': [],
617 '1': [],
618 '2': [],
619 '3': [],
620 '4': [],
621 '4UA': [],
622 '5': [],
623 '5LZ': [], # LZ = VEX.L must be zero.
624 '6': [],
625 '7': [],
626 '7LZ': [],
627 '8': [],
628 '11': [],
629 '12': [],
630 'E1': [],
631 'E1NF': [],
632 'E2': [],
633 'E3': [],
634 'E3NF': [],
635 'E4': [],
636 'E4NF': [],
637 'E5': [],
638 'E5NF': [],
639 'E6': [],
640 'E6NF': [],
641 'E7NF': [],
642 'E9': [],
643 'E9NF': [],
644 'E10': [],
645 'E11': [],
646 'E12': [],
647 'E12NF': [],
648};
649
650
651def _isValidOpcodeByte(sOpcode):
652 """
653 Checks if sOpcode is a valid lower case opcode byte.
654 Returns true/false.
655 """
656 if len(sOpcode) == 4:
657 if sOpcode[:2] == '0x':
658 if sOpcode[2] in '0123456789abcdef':
659 if sOpcode[3] in '0123456789abcdef':
660 return True;
661 return False;
662
663
664class InstructionMap(object):
665 """
666 Instruction map.
667
668 The opcode map provides the lead opcode bytes (empty for the one byte
669 opcode map). An instruction can be member of multiple opcode maps as long
670 as it uses the same opcode value within the map (because of VEX).
671 """
672
673 kdEncodings = {
674 'legacy': [],
675 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
676 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
677 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
678 'xop8': [], ##< XOP prefix with vvvvv = 8
679 'xop9': [], ##< XOP prefix with vvvvv = 9
680 'xop10': [], ##< XOP prefix with vvvvv = 10
681 };
682 ## Selectors.
683 ## 1. The first value is the number of table entries required by a
684 ## decoder or disassembler for this type of selector.
685 ## 2. The second value is how many entries per opcode byte if applicable.
686 kdSelectors = {
687 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
688 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
689 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
690 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
691 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
692 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
693 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
694 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
695 };
696
697 ## Define the subentry number according to the Instruction::sPrefix
698 ## value for 'byte+pfx' selected tables.
699 kiPrefixOrder = {
700 'none': 0,
701 '0x66': 1,
702 '0xf3': 2,
703 '0xf2': 3,
704 };
705
706 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
707 sEncoding = 'legacy', sDisParse = None):
708 assert sSelector in self.kdSelectors;
709 assert sEncoding in self.kdEncodings;
710 if asLeadOpcodes is None:
711 asLeadOpcodes = [];
712 else:
713 for sOpcode in asLeadOpcodes:
714 assert _isValidOpcodeByte(sOpcode);
715 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
716
717 self.sName = sName;
718 self.sIemName = sIemName;
719 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
720 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
721 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
722 self.aoInstructions = [] # type: Instruction
723 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
724
725 def copy(self, sNewName, sPrefixFilter = None):
726 """
727 Copies the table with filtering instruction by sPrefix if not None.
728 """
729 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
730 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
731 else self.sSelector,
732 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
733 if sPrefixFilter is None:
734 oCopy.aoInstructions = list(self.aoInstructions);
735 else:
736 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
737 return oCopy;
738
739 def getTableSize(self):
740 """
741 Number of table entries. This corresponds directly to the selector.
742 """
743 return self.kdSelectors[self.sSelector][0];
744
745 def getEntriesPerByte(self):
746 """
747 Number of table entries per opcode bytes.
748
749 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
750 the others it will just return 1.
751 """
752 return self.kdSelectors[self.sSelector][1];
753
754 def getInstructionIndex(self, oInstr):
755 """
756 Returns the table index for the instruction.
757 """
758 bOpcode = oInstr.getOpcodeByte();
759
760 # The byte selectors are simple. We need a full opcode byte and need just return it.
761 if self.sSelector == 'byte':
762 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
763 return bOpcode;
764
765 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
766 if self.sSelector == 'byte+pfx':
767 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
768 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
769 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
770
771 # The other selectors needs masking and shifting.
772 if self.sSelector == '/r':
773 return (bOpcode >> 3) & 0x7;
774
775 if self.sSelector == 'mod /r':
776 return (bOpcode >> 3) & 0x1f;
777
778 if self.sSelector == 'memreg /r':
779 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
780
781 if self.sSelector == '!11 /r':
782 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
783 return (bOpcode >> 3) & 0x7;
784
785 if self.sSelector == '11 /r':
786 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
787 return (bOpcode >> 3) & 0x7;
788
789 if self.sSelector == '11':
790 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
791 return bOpcode & 0x3f;
792
793 assert False, self.sSelector;
794 return -1;
795
796 def getInstructionsInTableOrder(self):
797 """
798 Get instructions in table order.
799
800 Returns array of instructions. Normally there is exactly one
801 instruction per entry. However the entry could also be None if
802 not instruction was specified for that opcode value. Or there
803 could be a list of instructions to deal with special encodings
804 where for instance prefix (e.g. REX.W) encodes a different
805 instruction or different CPUs have different instructions or
806 prefixes in the same place.
807 """
808 # Start with empty table.
809 cTable = self.getTableSize();
810 aoTable = [None] * cTable;
811
812 # Insert the instructions.
813 for oInstr in self.aoInstructions:
814 if oInstr.sOpcode:
815 idxOpcode = self.getInstructionIndex(oInstr);
816 assert idxOpcode < cTable, str(idxOpcode);
817
818 oExisting = aoTable[idxOpcode];
819 if oExisting is None:
820 aoTable[idxOpcode] = oInstr;
821 elif not isinstance(oExisting, list):
822 aoTable[idxOpcode] = list([oExisting, oInstr]);
823 else:
824 oExisting.append(oInstr);
825
826 return aoTable;
827
828
829 def getDisasTableName(self):
830 """
831 Returns the disassembler table name for this map.
832 """
833 sName = 'g_aDisas';
834 for sWord in self.sName.split('_'):
835 if sWord == 'm': # suffix indicating modrm.mod==mem
836 sName += '_m';
837 elif sWord == 'r': # suffix indicating modrm.mod==reg
838 sName += '_r';
839 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
840 sName += '_' + sWord;
841 else:
842 sWord = sWord.replace('grp', 'Grp');
843 sWord = sWord.replace('map', 'Map');
844 sName += sWord[0].upper() + sWord[1:];
845 return sName;
846
847 def getDisasRangeName(self):
848 """
849 Returns the disassembler table range name for this map.
850 """
851 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
852
853 def isVexMap(self):
854 """ Returns True if a VEX map. """
855 return self.sEncoding.startswith('vex');
856
857
858class TestType(object):
859 """
860 Test value type.
861
862 This base class deals with integer like values. The fUnsigned constructor
863 parameter indicates the default stance on zero vs sign extending. It is
864 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
865 """
866 def __init__(self, sName, acbSizes = None, fUnsigned = True):
867 self.sName = sName;
868 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
869 self.fUnsigned = fUnsigned;
870
871 class BadValue(Exception):
872 """ Bad value exception. """
873 def __init__(self, sMessage):
874 Exception.__init__(self, sMessage);
875 self.sMessage = sMessage;
876
877 ## For ascii ~ operator.
878 kdHexInv = {
879 '0': 'f',
880 '1': 'e',
881 '2': 'd',
882 '3': 'c',
883 '4': 'b',
884 '5': 'a',
885 '6': '9',
886 '7': '8',
887 '8': '7',
888 '9': '6',
889 'a': '5',
890 'b': '4',
891 'c': '3',
892 'd': '2',
893 'e': '1',
894 'f': '0',
895 };
896
897 def get(self, sValue):
898 """
899 Get the shortest normal sized byte representation of oValue.
900
901 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
902 The latter form is for AND+OR pairs where the first entry is what to
903 AND with the field and the second the one or OR with.
904
905 Raises BadValue if invalid value.
906 """
907 if not sValue:
908 raise TestType.BadValue('empty value');
909
910 # Deal with sign and detect hexadecimal or decimal.
911 fSignExtend = not self.fUnsigned;
912 if sValue[0] == '-' or sValue[0] == '+':
913 fSignExtend = True;
914 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
915 else:
916 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
917
918 # try convert it to long integer.
919 try:
920 iValue = long(sValue, 16 if fHex else 10);
921 except Exception as oXcpt:
922 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
923
924 # Convert the hex string and pad it to a decent value. Negative values
925 # needs to be manually converted to something non-negative (~-n + 1).
926 if iValue >= 0:
927 sHex = hex(iValue);
928 if sys.version_info[0] < 3:
929 assert sHex[-1] == 'L';
930 sHex = sHex[:-1];
931 assert sHex[:2] == '0x';
932 sHex = sHex[2:];
933 else:
934 sHex = hex(-iValue - 1);
935 if sys.version_info[0] < 3:
936 assert sHex[-1] == 'L';
937 sHex = sHex[:-1];
938 assert sHex[:2] == '0x';
939 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
940 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
941 sHex = 'f' + sHex;
942
943 cDigits = len(sHex);
944 if cDigits <= self.acbSizes[-1] * 2:
945 for cb in self.acbSizes:
946 cNaturalDigits = cb * 2;
947 if cDigits <= cNaturalDigits:
948 break;
949 else:
950 cNaturalDigits = self.acbSizes[-1] * 2;
951 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
952 assert isinstance(cNaturalDigits, int)
953
954 if cNaturalDigits != cDigits:
955 cNeeded = cNaturalDigits - cDigits;
956 if iValue >= 0:
957 sHex = ('0' * cNeeded) + sHex;
958 else:
959 sHex = ('f' * cNeeded) + sHex;
960
961 # Invert and convert to bytearray and return it.
962 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
963
964 return ((fSignExtend, abValue),);
965
966 def validate(self, sValue):
967 """
968 Returns True if value is okay, error message on failure.
969 """
970 try:
971 self.get(sValue);
972 except TestType.BadValue as oXcpt:
973 return oXcpt.sMessage;
974 return True;
975
976 def isAndOrPair(self, sValue):
977 """
978 Checks if sValue is a pair.
979 """
980 _ = sValue;
981 return False;
982
983
984class TestTypeEflags(TestType):
985 """
986 Special value parsing for EFLAGS/RFLAGS/FLAGS.
987 """
988
989 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
990
991 def __init__(self, sName):
992 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
993
994 def get(self, sValue):
995 fClear = 0;
996 fSet = 0;
997 for sFlag in sValue.split(','):
998 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
999 if sConstant is None:
1000 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1001 if sConstant[0] == '!':
1002 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
1003 else:
1004 fSet |= g_kdX86EFlagsConstants[sConstant];
1005
1006 aoSet = TestType.get(self, '0x%x' % (fSet,));
1007 if fClear != 0:
1008 aoClear = TestType.get(self, '%#x' % (fClear,))
1009 assert self.isAndOrPair(sValue) is True;
1010 return (aoClear[0], aoSet[0]);
1011 assert self.isAndOrPair(sValue) is False;
1012 return aoSet;
1013
1014 def isAndOrPair(self, sValue):
1015 for sZeroFlag in self.kdZeroValueFlags:
1016 if sValue.find(sZeroFlag) >= 0:
1017 return True;
1018 return False;
1019
1020class TestTypeFromDict(TestType):
1021 """
1022 Special value parsing for CR0.
1023 """
1024
1025 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1026
1027 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1028 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1029 self.kdConstantsAndValues = kdConstantsAndValues;
1030 self.sConstantPrefix = sConstantPrefix;
1031
1032 def get(self, sValue):
1033 fValue = 0;
1034 for sFlag in sValue.split(','):
1035 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1036 if fFlagValue is None:
1037 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1038 fValue |= fFlagValue;
1039 return TestType.get(self, '0x%x' % (fValue,));
1040
1041
1042class TestInOut(object):
1043 """
1044 One input or output state modifier.
1045
1046 This should be thought as values to modify BS3REGCTX and extended (needs
1047 to be structured) state.
1048 """
1049 ## Assigned operators.
1050 kasOperators = [
1051 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1052 '&~=',
1053 '&=',
1054 '|=',
1055 '='
1056 ];
1057 ## Types
1058 kdTypes = {
1059 'uint': TestType('uint', fUnsigned = True),
1060 'int': TestType('int'),
1061 'efl': TestTypeEflags('efl'),
1062 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1063 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1064 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1065 };
1066 ## CPU context fields.
1067 kdFields = {
1068 # name: ( default type, [both|input|output], )
1069 # Operands.
1070 'op1': ( 'uint', 'both', ), ## \@op1
1071 'op2': ( 'uint', 'both', ), ## \@op2
1072 'op3': ( 'uint', 'both', ), ## \@op3
1073 'op4': ( 'uint', 'both', ), ## \@op4
1074 # Flags.
1075 'efl': ( 'efl', 'both', ),
1076 'efl_undef': ( 'uint', 'output', ),
1077 # 8-bit GPRs.
1078 'al': ( 'uint', 'both', ),
1079 'cl': ( 'uint', 'both', ),
1080 'dl': ( 'uint', 'both', ),
1081 'bl': ( 'uint', 'both', ),
1082 'ah': ( 'uint', 'both', ),
1083 'ch': ( 'uint', 'both', ),
1084 'dh': ( 'uint', 'both', ),
1085 'bh': ( 'uint', 'both', ),
1086 'r8l': ( 'uint', 'both', ),
1087 'r9l': ( 'uint', 'both', ),
1088 'r10l': ( 'uint', 'both', ),
1089 'r11l': ( 'uint', 'both', ),
1090 'r12l': ( 'uint', 'both', ),
1091 'r13l': ( 'uint', 'both', ),
1092 'r14l': ( 'uint', 'both', ),
1093 'r15l': ( 'uint', 'both', ),
1094 # 16-bit GPRs.
1095 'ax': ( 'uint', 'both', ),
1096 'dx': ( 'uint', 'both', ),
1097 'cx': ( 'uint', 'both', ),
1098 'bx': ( 'uint', 'both', ),
1099 'sp': ( 'uint', 'both', ),
1100 'bp': ( 'uint', 'both', ),
1101 'si': ( 'uint', 'both', ),
1102 'di': ( 'uint', 'both', ),
1103 'r8w': ( 'uint', 'both', ),
1104 'r9w': ( 'uint', 'both', ),
1105 'r10w': ( 'uint', 'both', ),
1106 'r11w': ( 'uint', 'both', ),
1107 'r12w': ( 'uint', 'both', ),
1108 'r13w': ( 'uint', 'both', ),
1109 'r14w': ( 'uint', 'both', ),
1110 'r15w': ( 'uint', 'both', ),
1111 # 32-bit GPRs.
1112 'eax': ( 'uint', 'both', ),
1113 'edx': ( 'uint', 'both', ),
1114 'ecx': ( 'uint', 'both', ),
1115 'ebx': ( 'uint', 'both', ),
1116 'esp': ( 'uint', 'both', ),
1117 'ebp': ( 'uint', 'both', ),
1118 'esi': ( 'uint', 'both', ),
1119 'edi': ( 'uint', 'both', ),
1120 'r8d': ( 'uint', 'both', ),
1121 'r9d': ( 'uint', 'both', ),
1122 'r10d': ( 'uint', 'both', ),
1123 'r11d': ( 'uint', 'both', ),
1124 'r12d': ( 'uint', 'both', ),
1125 'r13d': ( 'uint', 'both', ),
1126 'r14d': ( 'uint', 'both', ),
1127 'r15d': ( 'uint', 'both', ),
1128 # 64-bit GPRs.
1129 'rax': ( 'uint', 'both', ),
1130 'rdx': ( 'uint', 'both', ),
1131 'rcx': ( 'uint', 'both', ),
1132 'rbx': ( 'uint', 'both', ),
1133 'rsp': ( 'uint', 'both', ),
1134 'rbp': ( 'uint', 'both', ),
1135 'rsi': ( 'uint', 'both', ),
1136 'rdi': ( 'uint', 'both', ),
1137 'r8': ( 'uint', 'both', ),
1138 'r9': ( 'uint', 'both', ),
1139 'r10': ( 'uint', 'both', ),
1140 'r11': ( 'uint', 'both', ),
1141 'r12': ( 'uint', 'both', ),
1142 'r13': ( 'uint', 'both', ),
1143 'r14': ( 'uint', 'both', ),
1144 'r15': ( 'uint', 'both', ),
1145 # 16-bit, 32-bit or 64-bit registers according to operand size.
1146 'oz.rax': ( 'uint', 'both', ),
1147 'oz.rdx': ( 'uint', 'both', ),
1148 'oz.rcx': ( 'uint', 'both', ),
1149 'oz.rbx': ( 'uint', 'both', ),
1150 'oz.rsp': ( 'uint', 'both', ),
1151 'oz.rbp': ( 'uint', 'both', ),
1152 'oz.rsi': ( 'uint', 'both', ),
1153 'oz.rdi': ( 'uint', 'both', ),
1154 'oz.r8': ( 'uint', 'both', ),
1155 'oz.r9': ( 'uint', 'both', ),
1156 'oz.r10': ( 'uint', 'both', ),
1157 'oz.r11': ( 'uint', 'both', ),
1158 'oz.r12': ( 'uint', 'both', ),
1159 'oz.r13': ( 'uint', 'both', ),
1160 'oz.r14': ( 'uint', 'both', ),
1161 'oz.r15': ( 'uint', 'both', ),
1162 # Control registers.
1163 'cr0': ( 'cr0', 'both', ),
1164 'cr4': ( 'cr4', 'both', ),
1165 'xcr0': ( 'xcr0', 'both', ),
1166 # FPU Registers
1167 'fcw': ( 'uint', 'both', ),
1168 'fsw': ( 'uint', 'both', ),
1169 'ftw': ( 'uint', 'both', ),
1170 'fop': ( 'uint', 'both', ),
1171 'fpuip': ( 'uint', 'both', ),
1172 'fpucs': ( 'uint', 'both', ),
1173 'fpudp': ( 'uint', 'both', ),
1174 'fpuds': ( 'uint', 'both', ),
1175 'mxcsr': ( 'uint', 'both', ),
1176 'st0': ( 'uint', 'both', ),
1177 'st1': ( 'uint', 'both', ),
1178 'st2': ( 'uint', 'both', ),
1179 'st3': ( 'uint', 'both', ),
1180 'st4': ( 'uint', 'both', ),
1181 'st5': ( 'uint', 'both', ),
1182 'st6': ( 'uint', 'both', ),
1183 'st7': ( 'uint', 'both', ),
1184 # MMX registers.
1185 'mm0': ( 'uint', 'both', ),
1186 'mm1': ( 'uint', 'both', ),
1187 'mm2': ( 'uint', 'both', ),
1188 'mm3': ( 'uint', 'both', ),
1189 'mm4': ( 'uint', 'both', ),
1190 'mm5': ( 'uint', 'both', ),
1191 'mm6': ( 'uint', 'both', ),
1192 'mm7': ( 'uint', 'both', ),
1193 # SSE registers.
1194 'xmm0': ( 'uint', 'both', ),
1195 'xmm1': ( 'uint', 'both', ),
1196 'xmm2': ( 'uint', 'both', ),
1197 'xmm3': ( 'uint', 'both', ),
1198 'xmm4': ( 'uint', 'both', ),
1199 'xmm5': ( 'uint', 'both', ),
1200 'xmm6': ( 'uint', 'both', ),
1201 'xmm7': ( 'uint', 'both', ),
1202 'xmm8': ( 'uint', 'both', ),
1203 'xmm9': ( 'uint', 'both', ),
1204 'xmm10': ( 'uint', 'both', ),
1205 'xmm11': ( 'uint', 'both', ),
1206 'xmm12': ( 'uint', 'both', ),
1207 'xmm13': ( 'uint', 'both', ),
1208 'xmm14': ( 'uint', 'both', ),
1209 'xmm15': ( 'uint', 'both', ),
1210 'xmm0.lo': ( 'uint', 'both', ),
1211 'xmm1.lo': ( 'uint', 'both', ),
1212 'xmm2.lo': ( 'uint', 'both', ),
1213 'xmm3.lo': ( 'uint', 'both', ),
1214 'xmm4.lo': ( 'uint', 'both', ),
1215 'xmm5.lo': ( 'uint', 'both', ),
1216 'xmm6.lo': ( 'uint', 'both', ),
1217 'xmm7.lo': ( 'uint', 'both', ),
1218 'xmm8.lo': ( 'uint', 'both', ),
1219 'xmm9.lo': ( 'uint', 'both', ),
1220 'xmm10.lo': ( 'uint', 'both', ),
1221 'xmm11.lo': ( 'uint', 'both', ),
1222 'xmm12.lo': ( 'uint', 'both', ),
1223 'xmm13.lo': ( 'uint', 'both', ),
1224 'xmm14.lo': ( 'uint', 'both', ),
1225 'xmm15.lo': ( 'uint', 'both', ),
1226 'xmm0.hi': ( 'uint', 'both', ),
1227 'xmm1.hi': ( 'uint', 'both', ),
1228 'xmm2.hi': ( 'uint', 'both', ),
1229 'xmm3.hi': ( 'uint', 'both', ),
1230 'xmm4.hi': ( 'uint', 'both', ),
1231 'xmm5.hi': ( 'uint', 'both', ),
1232 'xmm6.hi': ( 'uint', 'both', ),
1233 'xmm7.hi': ( 'uint', 'both', ),
1234 'xmm8.hi': ( 'uint', 'both', ),
1235 'xmm9.hi': ( 'uint', 'both', ),
1236 'xmm10.hi': ( 'uint', 'both', ),
1237 'xmm11.hi': ( 'uint', 'both', ),
1238 'xmm12.hi': ( 'uint', 'both', ),
1239 'xmm13.hi': ( 'uint', 'both', ),
1240 'xmm14.hi': ( 'uint', 'both', ),
1241 'xmm15.hi': ( 'uint', 'both', ),
1242 'xmm0.lo.zx': ( 'uint', 'both', ),
1243 'xmm1.lo.zx': ( 'uint', 'both', ),
1244 'xmm2.lo.zx': ( 'uint', 'both', ),
1245 'xmm3.lo.zx': ( 'uint', 'both', ),
1246 'xmm4.lo.zx': ( 'uint', 'both', ),
1247 'xmm5.lo.zx': ( 'uint', 'both', ),
1248 'xmm6.lo.zx': ( 'uint', 'both', ),
1249 'xmm7.lo.zx': ( 'uint', 'both', ),
1250 'xmm8.lo.zx': ( 'uint', 'both', ),
1251 'xmm9.lo.zx': ( 'uint', 'both', ),
1252 'xmm10.lo.zx': ( 'uint', 'both', ),
1253 'xmm11.lo.zx': ( 'uint', 'both', ),
1254 'xmm12.lo.zx': ( 'uint', 'both', ),
1255 'xmm13.lo.zx': ( 'uint', 'both', ),
1256 'xmm14.lo.zx': ( 'uint', 'both', ),
1257 'xmm15.lo.zx': ( 'uint', 'both', ),
1258 'xmm0.dw0': ( 'uint', 'both', ),
1259 'xmm1.dw0': ( 'uint', 'both', ),
1260 'xmm2.dw0': ( 'uint', 'both', ),
1261 'xmm3.dw0': ( 'uint', 'both', ),
1262 'xmm4.dw0': ( 'uint', 'both', ),
1263 'xmm5.dw0': ( 'uint', 'both', ),
1264 'xmm6.dw0': ( 'uint', 'both', ),
1265 'xmm7.dw0': ( 'uint', 'both', ),
1266 'xmm8.dw0': ( 'uint', 'both', ),
1267 'xmm9.dw0': ( 'uint', 'both', ),
1268 'xmm10.dw0': ( 'uint', 'both', ),
1269 'xmm11.dw0': ( 'uint', 'both', ),
1270 'xmm12.dw0': ( 'uint', 'both', ),
1271 'xmm13.dw0': ( 'uint', 'both', ),
1272 'xmm14.dw0': ( 'uint', 'both', ),
1273 'xmm15_dw0': ( 'uint', 'both', ),
1274 # AVX registers.
1275 'ymm0': ( 'uint', 'both', ),
1276 'ymm1': ( 'uint', 'both', ),
1277 'ymm2': ( 'uint', 'both', ),
1278 'ymm3': ( 'uint', 'both', ),
1279 'ymm4': ( 'uint', 'both', ),
1280 'ymm5': ( 'uint', 'both', ),
1281 'ymm6': ( 'uint', 'both', ),
1282 'ymm7': ( 'uint', 'both', ),
1283 'ymm8': ( 'uint', 'both', ),
1284 'ymm9': ( 'uint', 'both', ),
1285 'ymm10': ( 'uint', 'both', ),
1286 'ymm11': ( 'uint', 'both', ),
1287 'ymm12': ( 'uint', 'both', ),
1288 'ymm13': ( 'uint', 'both', ),
1289 'ymm14': ( 'uint', 'both', ),
1290 'ymm15': ( 'uint', 'both', ),
1291
1292 # Special ones.
1293 'value.xcpt': ( 'uint', 'output', ),
1294 };
1295
1296 def __init__(self, sField, sOp, sValue, sType):
1297 assert sField in self.kdFields;
1298 assert sOp in self.kasOperators;
1299 self.sField = sField;
1300 self.sOp = sOp;
1301 self.sValue = sValue;
1302 self.sType = sType;
1303 assert isinstance(sField, str);
1304 assert isinstance(sOp, str);
1305 assert isinstance(sType, str);
1306 assert isinstance(sValue, str);
1307
1308
1309class TestSelector(object):
1310 """
1311 One selector for an instruction test.
1312 """
1313 ## Selector compare operators.
1314 kasCompareOps = [ '==', '!=' ];
1315 ## Selector variables and their valid values.
1316 kdVariables = {
1317 # Operand size.
1318 'size': {
1319 'o16': 'size_o16',
1320 'o32': 'size_o32',
1321 'o64': 'size_o64',
1322 },
1323 # VEX.L value.
1324 'vex.l': {
1325 '0': 'vexl_0',
1326 '1': 'vexl_1',
1327 },
1328 # Execution ring.
1329 'ring': {
1330 '0': 'ring_0',
1331 '1': 'ring_1',
1332 '2': 'ring_2',
1333 '3': 'ring_3',
1334 '0..2': 'ring_0_thru_2',
1335 '1..3': 'ring_1_thru_3',
1336 },
1337 # Basic code mode.
1338 'codebits': {
1339 '64': 'code_64bit',
1340 '32': 'code_32bit',
1341 '16': 'code_16bit',
1342 },
1343 # cpu modes.
1344 'mode': {
1345 'real': 'mode_real',
1346 'prot': 'mode_prot',
1347 'long': 'mode_long',
1348 'v86': 'mode_v86',
1349 'smm': 'mode_smm',
1350 'vmx': 'mode_vmx',
1351 'svm': 'mode_svm',
1352 },
1353 # paging on/off
1354 'paging': {
1355 'on': 'paging_on',
1356 'off': 'paging_off',
1357 },
1358 # CPU vendor
1359 'vendor': {
1360 'amd': 'vendor_amd',
1361 'intel': 'vendor_intel',
1362 'via': 'vendor_via',
1363 },
1364 };
1365 ## Selector shorthand predicates.
1366 ## These translates into variable expressions.
1367 kdPredicates = {
1368 'o16': 'size==o16',
1369 'o32': 'size==o32',
1370 'o64': 'size==o64',
1371 'ring0': 'ring==0',
1372 '!ring0': 'ring==1..3',
1373 'ring1': 'ring==1',
1374 'ring2': 'ring==2',
1375 'ring3': 'ring==3',
1376 'user': 'ring==3',
1377 'supervisor': 'ring==0..2',
1378 '16-bit': 'codebits==16',
1379 '32-bit': 'codebits==32',
1380 '64-bit': 'codebits==64',
1381 'real': 'mode==real',
1382 'prot': 'mode==prot',
1383 'long': 'mode==long',
1384 'v86': 'mode==v86',
1385 'smm': 'mode==smm',
1386 'vmx': 'mode==vmx',
1387 'svm': 'mode==svm',
1388 'paging': 'paging==on',
1389 '!paging': 'paging==off',
1390 'amd': 'vendor==amd',
1391 '!amd': 'vendor!=amd',
1392 'intel': 'vendor==intel',
1393 '!intel': 'vendor!=intel',
1394 'via': 'vendor==via',
1395 '!via': 'vendor!=via',
1396 };
1397
1398 def __init__(self, sVariable, sOp, sValue):
1399 assert sVariable in self.kdVariables;
1400 assert sOp in self.kasCompareOps;
1401 assert sValue in self.kdVariables[sVariable];
1402 self.sVariable = sVariable;
1403 self.sOp = sOp;
1404 self.sValue = sValue;
1405
1406
1407class InstructionTest(object):
1408 """
1409 Instruction test.
1410 """
1411
1412 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1413 self.oInstr = oInstr # type: InstructionTest
1414 self.aoInputs = [] # type: List[TestInOut]
1415 self.aoOutputs = [] # type: List[TestInOut]
1416 self.aoSelectors = [] # type: List[TestSelector]
1417
1418 def toString(self, fRepr = False):
1419 """
1420 Converts it to string representation.
1421 """
1422 asWords = [];
1423 if self.aoSelectors:
1424 for oSelector in self.aoSelectors:
1425 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1426 asWords.append('/');
1427
1428 for oModifier in self.aoInputs:
1429 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1430
1431 asWords.append('->');
1432
1433 for oModifier in self.aoOutputs:
1434 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1435
1436 if fRepr:
1437 return '<' + ' '.join(asWords) + '>';
1438 return ' '.join(asWords);
1439
1440 def __str__(self):
1441 """ Provide string represenation. """
1442 return self.toString(False);
1443
1444 def __repr__(self):
1445 """ Provide unambigious string representation. """
1446 return self.toString(True);
1447
1448class Operand(object):
1449 """
1450 Instruction operand.
1451 """
1452
1453 def __init__(self, sWhere, sType):
1454 assert sWhere in g_kdOpLocations, sWhere;
1455 assert sType in g_kdOpTypes, sType;
1456 self.sWhere = sWhere; ##< g_kdOpLocations
1457 self.sType = sType; ##< g_kdOpTypes
1458
1459 def usesModRM(self):
1460 """ Returns True if using some form of ModR/M encoding. """
1461 return self.sType[0] in ['E', 'G', 'M'];
1462
1463
1464
1465class Instruction(object): # pylint: disable=too-many-instance-attributes
1466 """
1467 Instruction.
1468 """
1469
1470 def __init__(self, sSrcFile, iLine):
1471 ## @name Core attributes.
1472 ## @{
1473 self.oParent = None # type: Instruction
1474 self.sMnemonic = None;
1475 self.sBrief = None;
1476 self.asDescSections = [] # type: List[str]
1477 self.aoMaps = [] # type: List[InstructionMap]
1478 self.aoOperands = [] # type: List[Operand]
1479 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1480 self.sOpcode = None # type: str
1481 self.sSubOpcode = None # type: str
1482 self.sEncoding = None;
1483 self.asFlTest = None;
1484 self.asFlModify = None;
1485 self.asFlUndefined = None;
1486 self.asFlSet = None;
1487 self.asFlClear = None;
1488 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1489 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1490 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1491 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1492 self.aoTests = [] # type: List[InstructionTest]
1493 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1494 self.oCpuExpr = None; ##< Some CPU restriction expression...
1495 self.sGroup = None;
1496 self.fUnused = False; ##< Unused instruction.
1497 self.fInvalid = False; ##< Invalid instruction (like UD2).
1498 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1499 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1500 ## @}
1501
1502 ## @name Implementation attributes.
1503 ## @{
1504 self.sStats = None;
1505 self.sFunction = None;
1506 self.fStub = False;
1507 self.fUdStub = False;
1508 ## @}
1509
1510 ## @name Decoding info
1511 ## @{
1512 self.sSrcFile = sSrcFile;
1513 self.iLineCreated = iLine;
1514 self.iLineCompleted = None;
1515 self.cOpTags = 0;
1516 self.iLineFnIemOpMacro = -1;
1517 self.iLineMnemonicMacro = -1;
1518 ## @}
1519
1520 ## @name Intermediate input fields.
1521 ## @{
1522 self.sRawDisOpNo = None;
1523 self.asRawDisParams = [];
1524 self.sRawIemOpFlags = None;
1525 self.sRawOldOpcodes = None;
1526 self.asCopyTests = [];
1527 ## @}
1528
1529 ## All the MC blocks associated with this instruction.
1530 self.aoMcBlocks = [] # type: List[McBlock]
1531
1532 def toString(self, fRepr = False):
1533 """ Turn object into a string. """
1534 aasFields = [];
1535
1536 aasFields.append(['opcode', self.sOpcode]);
1537 if self.sPrefix:
1538 aasFields.append(['prefix', self.sPrefix]);
1539 aasFields.append(['mnemonic', self.sMnemonic]);
1540 for iOperand, oOperand in enumerate(self.aoOperands):
1541 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1542 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1543 aasFields.append(['encoding', self.sEncoding]);
1544 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1545 aasFields.append(['disenum', self.sDisEnum]);
1546 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1547 aasFields.append(['group', self.sGroup]);
1548 if self.fUnused: aasFields.append(['unused', 'True']);
1549 if self.fInvalid: aasFields.append(['invalid', 'True']);
1550 aasFields.append(['invlstyle', self.sInvalidStyle]);
1551 aasFields.append(['fltest', self.asFlTest]);
1552 aasFields.append(['flmodify', self.asFlModify]);
1553 aasFields.append(['flundef', self.asFlUndefined]);
1554 aasFields.append(['flset', self.asFlSet]);
1555 aasFields.append(['flclear', self.asFlClear]);
1556 aasFields.append(['mincpu', self.sMinCpu]);
1557 aasFields.append(['stats', self.sStats]);
1558 aasFields.append(['sFunction', self.sFunction]);
1559 if self.fStub: aasFields.append(['fStub', 'True']);
1560 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1561 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1562 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1563 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1564
1565 sRet = '<' if fRepr else '';
1566 for sField, sValue in aasFields:
1567 if sValue is not None:
1568 if len(sRet) > 1:
1569 sRet += '; ';
1570 sRet += '%s=%s' % (sField, sValue,);
1571 if fRepr:
1572 sRet += '>';
1573
1574 return sRet;
1575
1576 def __str__(self):
1577 """ Provide string represenation. """
1578 return self.toString(False);
1579
1580 def __repr__(self):
1581 """ Provide unambigious string representation. """
1582 return self.toString(True);
1583
1584 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1585 """
1586 Makes a copy of the object for the purpose of putting in a different map
1587 or a different place in the current map.
1588 """
1589 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1590
1591 oCopy.oParent = self;
1592 oCopy.sMnemonic = self.sMnemonic;
1593 oCopy.sBrief = self.sBrief;
1594 oCopy.asDescSections = list(self.asDescSections);
1595 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1596 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1597 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1598 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1599 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1600 oCopy.sEncoding = self.sEncoding;
1601 oCopy.asFlTest = self.asFlTest;
1602 oCopy.asFlModify = self.asFlModify;
1603 oCopy.asFlUndefined = self.asFlUndefined;
1604 oCopy.asFlSet = self.asFlSet;
1605 oCopy.asFlClear = self.asFlClear;
1606 oCopy.dHints = dict(self.dHints);
1607 oCopy.sDisEnum = self.sDisEnum;
1608 oCopy.asCpuIds = list(self.asCpuIds);
1609 oCopy.asReqFeatures = list(self.asReqFeatures);
1610 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1611 oCopy.sMinCpu = self.sMinCpu;
1612 oCopy.oCpuExpr = self.oCpuExpr;
1613 oCopy.sGroup = self.sGroup;
1614 oCopy.fUnused = self.fUnused;
1615 oCopy.fInvalid = self.fInvalid;
1616 oCopy.sInvalidStyle = self.sInvalidStyle;
1617 oCopy.sXcptType = self.sXcptType;
1618
1619 oCopy.sStats = self.sStats;
1620 oCopy.sFunction = self.sFunction;
1621 oCopy.fStub = self.fStub;
1622 oCopy.fUdStub = self.fUdStub;
1623
1624 oCopy.iLineCompleted = self.iLineCompleted;
1625 oCopy.cOpTags = self.cOpTags;
1626 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1627 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1628
1629 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1630 oCopy.asRawDisParams = list(self.asRawDisParams);
1631 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1632 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1633 oCopy.asCopyTests = list(self.asCopyTests);
1634
1635 return oCopy;
1636
1637 def getOpcodeByte(self):
1638 """
1639 Decodes sOpcode into a byte range integer value.
1640 Raises exception if sOpcode is None or invalid.
1641 """
1642 if self.sOpcode is None:
1643 raise Exception('No opcode byte for %s!' % (self,));
1644 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1645
1646 # Full hex byte form.
1647 if sOpcode[:2] == '0x':
1648 return int(sOpcode, 16);
1649
1650 # The /r form:
1651 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1652 return int(sOpcode[1:]) << 3;
1653
1654 # The 11/r form:
1655 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1656 return (int(sOpcode[-1:]) << 3) | 0xc0;
1657
1658 # The !11/r form (returns mod=1):
1659 ## @todo this doesn't really work...
1660 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1661 return (int(sOpcode[-1:]) << 3) | 0x80;
1662
1663 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1664
1665 @staticmethod
1666 def _flagsToIntegerMask(asFlags):
1667 """
1668 Returns the integer mask value for asFlags.
1669 """
1670 uRet = 0;
1671 if asFlags:
1672 for sFlag in asFlags:
1673 sConstant = g_kdEFlagsMnemonics[sFlag];
1674 assert sConstant[0] != '!', sConstant
1675 uRet |= g_kdX86EFlagsConstants[sConstant];
1676 return uRet;
1677
1678 def getTestedFlagsMask(self):
1679 """ Returns asFlTest into a integer mask value """
1680 return self._flagsToIntegerMask(self.asFlTest);
1681
1682 def getModifiedFlagsMask(self):
1683 """ Returns asFlModify into a integer mask value """
1684 return self._flagsToIntegerMask(self.asFlModify);
1685
1686 def getUndefinedFlagsMask(self):
1687 """ Returns asFlUndefined into a integer mask value """
1688 return self._flagsToIntegerMask(self.asFlUndefined);
1689
1690 def getSetFlagsMask(self):
1691 """ Returns asFlSet into a integer mask value """
1692 return self._flagsToIntegerMask(self.asFlSet);
1693
1694 def getClearedFlagsMask(self):
1695 """ Returns asFlClear into a integer mask value """
1696 return self._flagsToIntegerMask(self.asFlClear);
1697
1698 @staticmethod
1699 def _flagsToC(asFlags):
1700 """
1701 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1702 """
1703 if asFlags:
1704 asRet = [];
1705 for sFlag in asFlags:
1706 sConstant = g_kdEFlagsMnemonics[sFlag];
1707 assert sConstant[0] != '!', sConstant
1708 asRet.append(sConstant);
1709 return ' | '.join(asRet);
1710 return '0';
1711
1712 def getTestedFlagsCStyle(self):
1713 """ Returns asFlTest as C constants ored together. """
1714 return self._flagsToC(self.asFlTest);
1715
1716 def getModifiedFlagsCStyle(self):
1717 """ Returns asFlModify as C constants ored together. """
1718 return self._flagsToC(self.asFlModify);
1719
1720 def getUndefinedFlagsCStyle(self):
1721 """ Returns asFlUndefined as C constants ored together. """
1722 return self._flagsToC(self.asFlUndefined);
1723
1724 def getSetFlagsCStyle(self):
1725 """ Returns asFlSet as C constants ored together. """
1726 return self._flagsToC(self.asFlSet);
1727
1728 def getClearedFlagsCStyle(self):
1729 """ Returns asFlClear as C constants ored together. """
1730 return self._flagsToC(self.asFlClear);
1731
1732 def onlyInVexMaps(self):
1733 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1734 if not self.aoMaps:
1735 return False;
1736 for oMap in self.aoMaps:
1737 if not oMap.isVexMap():
1738 return False;
1739 return True;
1740
1741
1742
1743## All the instructions.
1744g_aoAllInstructions = [] # type: List[Instruction]
1745
1746## All the instructions indexed by statistics name (opstat).
1747g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1748
1749## All the instructions indexed by function name (opfunction).
1750g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1751
1752## Instructions tagged by oponlytest
1753g_aoOnlyTestInstructions = [] # type: List[Instruction]
1754
1755## Instruction maps.
1756g_aoInstructionMaps = [
1757 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1758 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1759 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1760 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1761 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1762 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1763 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1764 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1765 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1766 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1767 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1768 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1769 ## @todo g_apfnEscF1_E0toFF
1770 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1771 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1772 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1773 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1774 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1775 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1776 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1777 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1778
1779 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1780 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1781 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1782 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1783 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1784 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1785 ## @todo What about g_apfnGroup9MemReg?
1786 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1787 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1788 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1789 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1790 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1791 ## @todo What about g_apfnGroup15RegReg?
1792 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1793 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1794 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1795
1796 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1797 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1798
1799 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1800 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1801 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1802 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1803 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1804 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1805
1806 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1807 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1808
1809 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1810 InstructionMap('xopmap8', sEncoding = 'xop8'),
1811 InstructionMap('xopmap9', sEncoding = 'xop9'),
1812 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1813 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1814 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1815 InstructionMap('xopmap10', sEncoding = 'xop10'),
1816 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1817];
1818g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1819g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1820
1821
1822#
1823# Decoder functions.
1824#
1825
1826class DecoderFunction(object):
1827 """
1828 Decoder function.
1829
1830 This is mainly for searching for scoping searches for variables used in
1831 microcode blocks.
1832 """
1833 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1834 self.sName = sName; ##< The function name.
1835 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1836 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1837 self.iBeginLine = iBeginLine; ##< The start line.
1838 self.iEndLine = -1; ##< The line the function (probably) ends on.
1839 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1840
1841 def complete(self, iEndLine, asLines):
1842 """
1843 Completes the function.
1844 """
1845 assert self.iEndLine == -1;
1846 self.iEndLine = iEndLine;
1847 self.asLines = asLines;
1848
1849
1850#
1851# "Microcode" statements and blocks
1852#
1853
1854class McStmt(object):
1855 """
1856 Statement in a microcode block.
1857 """
1858 def __init__(self, sName, asParams):
1859 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1860 self.asParams = asParams;
1861 self.oUser = None;
1862
1863 def renderCode(self, cchIndent = 0):
1864 """
1865 Renders the code for the statement.
1866 """
1867 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1868
1869 @staticmethod
1870 def renderCodeForList(aoStmts, cchIndent = 0):
1871 """
1872 Renders a list of statements.
1873 """
1874 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1875
1876 @staticmethod
1877 def findStmtByNames(aoStmts, dNames):
1878 """
1879 Returns first statement with any of the given names in from the list.
1880
1881 Note! The names are passed as a dictionary for quick lookup, the value
1882 does not matter.
1883 """
1884 for oStmt in aoStmts:
1885 if oStmt.sName in dNames:
1886 return oStmt;
1887 if isinstance(oStmt, McStmtCond):
1888 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1889 if not oHit:
1890 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1891 if oHit:
1892 return oHit;
1893 return None;
1894
1895 @staticmethod
1896 def countStmtsByName(aoStmts, dNames, dRet):
1897 """
1898 Searches the given list of statements for the names in the dictionary,
1899 adding each found to dRet with an occurnece count.
1900
1901 return total number of hits;
1902 """
1903 cHits = 0;
1904 for oStmt in aoStmts:
1905 if oStmt.sName in dNames:
1906 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1907 cHits += 1;
1908 if isinstance(oStmt, McStmtCond):
1909 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1910 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1911 return cHits;
1912
1913 def isCppStmt(self):
1914 """ Checks if this is a C++ statement. """
1915 return self.sName.startswith('C++');
1916
1917class McStmtCond(McStmt):
1918 """
1919 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1920 """
1921 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1922 McStmt.__init__(self, sName, asParams);
1923 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1924 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1925 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1926 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1927 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1928
1929 def renderCode(self, cchIndent = 0):
1930 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1931 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1932 if self.aoElseBranch:
1933 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1934 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1935 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1936 return sRet;
1937
1938class McStmtNativeIf(McStmtCond):
1939 """ IEM_MC_NATIVE_IF """
1940 def __init__(self, sName, asArchitectures):
1941 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1942 self.asArchitectures = asArchitectures;
1943
1944class McStmtVar(McStmt):
1945 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1946 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1947 McStmt.__init__(self, sName, asParams);
1948 self.sType = sType;
1949 self.sVarName = sVarName;
1950 self.sValue = sValue; ##< None if no assigned / const value.
1951
1952class McStmtArg(McStmtVar):
1953 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1954 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1955 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1956 self.iArg = iArg;
1957 self.sRef = sRef; ##< The reference string (local variable, register).
1958 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1959 assert sRefType in ('none', 'local');
1960
1961class McStmtCall(McStmt):
1962 """ IEM_MC_CALL_* """
1963 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1964 McStmt.__init__(self, sName, asParams);
1965 self.idxFn = iFnParam;
1966 self.idxParams = iFnParam + 1;
1967 self.sFn = asParams[iFnParam];
1968 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1969
1970class McStmtAssertEFlags(McStmt):
1971 """
1972 IEM_MC_ASSERT_EFLAGS
1973 """
1974 def __init__(self, oInstruction):
1975 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1976 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1977
1978
1979class McCppGeneric(McStmt):
1980 """
1981 Generic C++/C statement.
1982 """
1983 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1984 McStmt.__init__(self, sName, [sCode,]);
1985 self.fDecode = fDecode;
1986 self.cchIndent = cchIndent;
1987
1988 def renderCode(self, cchIndent = 0):
1989 cchIndent += self.cchIndent;
1990 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1991 if self.fDecode:
1992 sRet = sRet.replace('\n', ' // C++ decode\n');
1993 else:
1994 sRet = sRet.replace('\n', ' // C++ normal\n');
1995 return sRet;
1996
1997class McCppCall(McCppGeneric):
1998 """
1999 A generic C++/C call statement.
2000
2001 The sName is still 'C++', so the function name is in the first parameter
2002 and the the arguments in the subsequent ones.
2003 """
2004 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
2005 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
2006 self.asParams.extend(asArgs);
2007
2008 def renderCode(self, cchIndent = 0):
2009 cchIndent += self.cchIndent;
2010 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
2011 if self.fDecode:
2012 sRet += ' // C++ decode\n';
2013 else:
2014 sRet += ' // C++ normal\n';
2015 return sRet;
2016
2017class McCppCond(McStmtCond):
2018 """
2019 C++/C 'if' statement.
2020 """
2021 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2022 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2023 self.fDecode = fDecode;
2024 self.cchIndent = cchIndent;
2025
2026 def renderCode(self, cchIndent = 0):
2027 cchIndent += self.cchIndent;
2028 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2029 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2030 sRet += ' ' * cchIndent + '{\n';
2031 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2032 sRet += ' ' * cchIndent + '}\n';
2033 if self.aoElseBranch:
2034 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2035 sRet += ' ' * cchIndent + '{\n';
2036 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2037 sRet += ' ' * cchIndent + '}\n';
2038 return sRet;
2039
2040class McCppPreProc(McCppGeneric):
2041 """
2042 C++/C Preprocessor directive.
2043 """
2044 def __init__(self, sCode):
2045 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2046
2047 def renderCode(self, cchIndent = 0):
2048 return self.asParams[0] + '\n';
2049
2050
2051## IEM_MC_F_XXX values.
2052g_kdMcFlags = {
2053 'IEM_MC_F_ONLY_8086': (),
2054 'IEM_MC_F_MIN_186': (),
2055 'IEM_MC_F_MIN_286': (),
2056 'IEM_MC_F_NOT_286_OR_OLDER': (),
2057 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2058 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2059 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2060 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2061 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2062 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2063 'IEM_MC_F_NOT_64BIT': (),
2064};
2065## IEM_MC_F_XXX values.
2066g_kdCImplFlags = {
2067 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2068 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2069 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2070 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2071 'IEM_CIMPL_F_BRANCH_FAR': (),
2072 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2073 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2074 'IEM_CIMPL_F_BRANCH_STACK': (),
2075 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2076 'IEM_CIMPL_F_MODE': (),
2077 'IEM_CIMPL_F_RFLAGS': (),
2078 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2079 'IEM_CIMPL_F_STATUS_FLAGS': (),
2080 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2081 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2082 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2083 'IEM_CIMPL_F_VMEXIT': (),
2084 'IEM_CIMPL_F_FPU': (),
2085 'IEM_CIMPL_F_REP': (),
2086 'IEM_CIMPL_F_IO': (),
2087 'IEM_CIMPL_F_END_TB': (),
2088 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2089 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2090 'IEM_CIMPL_F_CALLS_CIMPL': (),
2091 'IEM_CIMPL_F_CALLS_AIMPL': (),
2092 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2093};
2094class McBlock(object):
2095 """
2096 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2097 """
2098
2099 ## @name Macro expansion types.
2100 ## @{
2101 kiMacroExp_None = 0;
2102 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2103 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2104 ## @}
2105
2106 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2107 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2108 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2109 self.fDeferToCImpl = fDeferToCImpl;
2110 ## The source file containing the block.
2111 self.sSrcFile = sSrcFile;
2112 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2113 self.iBeginLine = iBeginLine;
2114 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2115 self.offBeginLine = offBeginLine;
2116 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2117 self.iEndLine = -1;
2118 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2119 self.offEndLine = 0;
2120 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2121 self.offAfterEnd = 0;
2122 ## The function the block resides in.
2123 self.oFunction = oFunction;
2124 ## The name of the function the block resides in. DEPRECATED.
2125 self.sFunction = oFunction.sName;
2126 ## The block number within the function.
2127 self.iInFunction = iInFunction;
2128 ## The instruction this block is associated with - can be None.
2129 self.oInstruction = oInstruction # type: Instruction
2130 ## Indentation level of the block.
2131 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2132 ## The raw lines the block is made up of.
2133 self.asLines = [] # type: List[str]
2134 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2135 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2136 self.iMacroExp = self.kiMacroExp_None;
2137 ## IEM_MC_BEGIN: Argument count.
2138 self.cArgs = -1;
2139 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2140 self.aoArgs = [] # type: List[McStmtArg]
2141 ## IEM_MC_BEGIN: Locals count.
2142 self.cLocals = -1;
2143 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2144 self.aoLocals = [] # type: List[McStmtVar]
2145 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2146 self.dsMcFlags = {} # type: Dict[str, bool]
2147 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2148 self.dsCImplFlags = {} # type: Dict[str, bool]
2149 ## Decoded statements in the block.
2150 self.aoStmts = [] # type: List[McStmt]
2151
2152 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2153 """
2154 Completes the microcode block.
2155 """
2156 assert self.iEndLine == -1;
2157 self.iEndLine = iEndLine;
2158 self.offEndLine = offEndLine;
2159 self.offAfterEnd = offAfterEnd;
2160 self.asLines = asLines;
2161
2162 def raiseDecodeError(self, sRawCode, off, sMessage):
2163 """ Raises a decoding error. """
2164 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2165 iLine = sRawCode.count('\n', 0, off);
2166 raise ParserException('%s:%d:%d: parsing error: %s'
2167 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2168
2169 def raiseStmtError(self, sName, sMessage):
2170 """ Raises a statement parser error. """
2171 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2172
2173 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2174 """ Check the parameter count, raising an error it doesn't match. """
2175 if len(asParams) != cParamsExpected:
2176 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2177 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2178 return True;
2179
2180 @staticmethod
2181 def parseMcGeneric(oSelf, sName, asParams):
2182 """ Generic parser that returns a plain McStmt object. """
2183 _ = oSelf;
2184 return McStmt(sName, asParams);
2185
2186 @staticmethod
2187 def parseMcGenericCond(oSelf, sName, asParams):
2188 """ Generic parser that returns a plain McStmtCond object. """
2189 _ = oSelf;
2190 return McStmtCond(sName, asParams);
2191
2192 kdArchVals = {
2193 'RT_ARCH_VAL_X86': True,
2194 'RT_ARCH_VAL_AMD64': True,
2195 'RT_ARCH_VAL_ARM32': True,
2196 'RT_ARCH_VAL_ARM64': True,
2197 'RT_ARCH_VAL_SPARC32': True,
2198 'RT_ARCH_VAL_SPARC64': True,
2199 };
2200
2201 @staticmethod
2202 def parseMcNativeIf(oSelf, sName, asParams):
2203 """ IEM_MC_NATIVE_IF """
2204 oSelf.checkStmtParamCount(sName, asParams, 1);
2205 if asParams[0].strip() == '0':
2206 asArchitectures = [];
2207 else:
2208 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2209 for sArch in asArchitectures:
2210 if sArch not in oSelf.kdArchVals:
2211 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2212 return McStmtNativeIf(sName, asArchitectures);
2213
2214 @staticmethod
2215 def parseMcBegin(oSelf, sName, asParams):
2216 """ IEM_MC_BEGIN """
2217 oSelf.checkStmtParamCount(sName, asParams, 4);
2218 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2219 oSelf.raiseStmtError(sName, 'Used more than once!');
2220 oSelf.cArgs = int(asParams[0]);
2221 oSelf.cLocals = int(asParams[1]);
2222
2223 if asParams[2] != '0':
2224 for sFlag in asParams[2].split('|'):
2225 sFlag = sFlag.strip();
2226 if sFlag not in g_kdMcFlags:
2227 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2228 oSelf.dsMcFlags[sFlag] = True;
2229 for sFlag2 in g_kdMcFlags[sFlag]:
2230 oSelf.dsMcFlags[sFlag2] = True;
2231
2232 if asParams[3] != '0':
2233 oSelf.parseCImplFlags(sName, asParams[3]);
2234
2235 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2236
2237 @staticmethod
2238 def parseMcArg(oSelf, sName, asParams):
2239 """ IEM_MC_ARG """
2240 oSelf.checkStmtParamCount(sName, asParams, 3);
2241 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2242 oSelf.aoArgs.append(oStmt);
2243 return oStmt;
2244
2245 @staticmethod
2246 def parseMcArgConst(oSelf, sName, asParams):
2247 """ IEM_MC_ARG_CONST """
2248 oSelf.checkStmtParamCount(sName, asParams, 4);
2249 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2250 oSelf.aoArgs.append(oStmt);
2251 return oStmt;
2252
2253 @staticmethod
2254 def parseMcArgLocalRef(oSelf, sName, asParams):
2255 """ IEM_MC_ARG_LOCAL_REF """
2256 oSelf.checkStmtParamCount(sName, asParams, 4);
2257 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2258 oSelf.aoArgs.append(oStmt);
2259 return oStmt;
2260
2261 @staticmethod
2262 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2263 """ IEM_MC_ARG_LOCAL_EFLAGS """
2264 oSelf.checkStmtParamCount(sName, asParams, 3);
2265 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2266 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2267 oSelf.aoLocals.append(oStmtLocal);
2268 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2269 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2270 oSelf.aoArgs.append(oStmtArg);
2271 return (oStmtLocal, oStmtArg,);
2272
2273 @staticmethod
2274 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2275 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2276 oSelf.checkStmtParamCount(sName, asParams, 0);
2277 # Note! Translate to IEM_MC_ARG_CONST
2278 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2279 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2280 oSelf.aoArgs.append(oStmt);
2281 return oStmt;
2282
2283 @staticmethod
2284 def parseMcLocal(oSelf, sName, asParams):
2285 """ IEM_MC_LOCAL """
2286 oSelf.checkStmtParamCount(sName, asParams, 2);
2287 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2288 oSelf.aoLocals.append(oStmt);
2289 return oStmt;
2290
2291 @staticmethod
2292 def parseMcLocalAssign(oSelf, sName, asParams):
2293 """ IEM_MC_LOCAL_ASSIGN """
2294 oSelf.checkStmtParamCount(sName, asParams, 3);
2295 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2296 oSelf.aoLocals.append(oStmt);
2297 return oStmt;
2298
2299 @staticmethod
2300 def parseMcLocalConst(oSelf, sName, asParams):
2301 """ IEM_MC_LOCAL_CONST """
2302 oSelf.checkStmtParamCount(sName, asParams, 3);
2303 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2304 oSelf.aoLocals.append(oStmt);
2305 return oStmt;
2306
2307 @staticmethod
2308 def parseMcLocalEFlags(oSelf, sName, asParams):
2309 """ IEM_MC_LOCAL_EFLAGS"""
2310 oSelf.checkStmtParamCount(sName, asParams, 1);
2311 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2312 oSelf.aoLocals.append(oStmt);
2313 return oStmt;
2314
2315 @staticmethod
2316 def parseMcCallAImpl(oSelf, sName, asParams):
2317 """ IEM_MC_CALL_AIMPL_3|4 """
2318 cArgs = int(sName[-1]);
2319 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2320 return McStmtCall(sName, asParams, 1, 0);
2321
2322 @staticmethod
2323 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2324 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2325 cArgs = int(sName[-1]);
2326 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2327 return McStmtCall(sName, asParams, 0);
2328
2329 @staticmethod
2330 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2331 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2332 cArgs = int(sName[-1]);
2333 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2334 return McStmtCall(sName, asParams, 0);
2335
2336 @staticmethod
2337 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2338 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2339 cArgs = int(sName[-1]);
2340 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2341 return McStmtCall(sName, asParams, 0);
2342
2343 @staticmethod
2344 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2345 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2346 cArgs = int(sName[-1]);
2347 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2348 return McStmtCall(sName, asParams, 0);
2349
2350 @staticmethod
2351 def parseMcCallSseAImpl(oSelf, sName, asParams):
2352 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2353 cArgs = int(sName[-1]);
2354 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2355 return McStmtCall(sName, asParams, 0);
2356
2357 def parseCImplFlags(self, sName, sFlags):
2358 """
2359 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2360 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2361 """
2362 if sFlags != '0':
2363 sFlags = self.stripComments(sFlags);
2364 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2365 for sFlag in sFlags.split('|'):
2366 sFlag = sFlag.strip();
2367 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2368 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2369 #print('debug: %s' % sFlag)
2370 if sFlag not in g_kdCImplFlags:
2371 if sFlag == '0':
2372 continue;
2373 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2374 self.dsCImplFlags[sFlag] = True;
2375 for sFlag2 in g_kdCImplFlags[sFlag]:
2376 self.dsCImplFlags[sFlag2] = True;
2377 return None;
2378
2379 @staticmethod
2380 def parseMcCallCImpl(oSelf, sName, asParams):
2381 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2382 cArgs = int(sName[-1]);
2383 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2384 oSelf.parseCImplFlags(sName, asParams[0]);
2385 return McStmtCall(sName, asParams, 2);
2386
2387 @staticmethod
2388 def parseMcDeferToCImpl(oSelf, sName, asParams):
2389 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2390 # Note! This code is called by workerIemMcDeferToCImplXRet.
2391 #print('debug: %s, %s,...' % (sName, asParams[0],));
2392 cArgs = int(sName[-5]);
2393 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2394 oSelf.parseCImplFlags(sName, asParams[0]);
2395 return McStmtCall(sName, asParams, 2);
2396
2397 @staticmethod
2398 def stripComments(sCode):
2399 """ Returns sCode with comments removed. """
2400 off = 0;
2401 while off < len(sCode):
2402 off = sCode.find('/', off);
2403 if off < 0 or off + 1 >= len(sCode):
2404 break;
2405
2406 if sCode[off + 1] == '/':
2407 # C++ comment.
2408 offEnd = sCode.find('\n', off + 2);
2409 if offEnd < 0:
2410 return sCode[:off].rstrip();
2411 sCode = sCode[ : off] + sCode[offEnd : ];
2412 off += 1;
2413
2414 elif sCode[off + 1] == '*':
2415 # C comment
2416 offEnd = sCode.find('*/', off + 2);
2417 if offEnd < 0:
2418 return sCode[:off].rstrip();
2419 sSep = ' ';
2420 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2421 sSep = '';
2422 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2423 off += len(sSep);
2424
2425 else:
2426 # Not a comment.
2427 off += 1;
2428 return sCode;
2429
2430 @staticmethod
2431 def extractParam(sCode, offParam):
2432 """
2433 Extracts the parameter value at offParam in sCode.
2434 Returns stripped value and the end offset of the terminating ',' or ')'.
2435 """
2436 # Extract it.
2437 cNesting = 0;
2438 offStart = offParam;
2439 while offParam < len(sCode):
2440 ch = sCode[offParam];
2441 if ch == '(':
2442 cNesting += 1;
2443 elif ch == ')':
2444 if cNesting == 0:
2445 break;
2446 cNesting -= 1;
2447 elif ch == ',' and cNesting == 0:
2448 break;
2449 offParam += 1;
2450 return (sCode[offStart : offParam].strip(), offParam);
2451
2452 @staticmethod
2453 def extractParams(sCode, offOpenParen):
2454 """
2455 Parses a parameter list.
2456 Returns the list of parameter values and the offset of the closing parentheses.
2457 Returns (None, len(sCode)) on if no closing parentheses was found.
2458 """
2459 assert sCode[offOpenParen] == '(';
2460 asParams = [];
2461 off = offOpenParen + 1;
2462 while off < len(sCode):
2463 ch = sCode[off];
2464 if ch.isspace():
2465 off += 1;
2466 elif ch != ')':
2467 (sParam, off) = McBlock.extractParam(sCode, off);
2468 asParams.append(sParam);
2469 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2470 if sCode[off] == ',':
2471 off += 1;
2472 else:
2473 return (asParams, off);
2474 return (None, off);
2475
2476 @staticmethod
2477 def findClosingBraces(sCode, off, offStop):
2478 """
2479 Finds the matching '}' for the '{' at off in sCode.
2480 Returns offset of the matching '}' on success, otherwise -1.
2481
2482 Note! Does not take comments into account.
2483 """
2484 cDepth = 1;
2485 off += 1;
2486 while off < offStop:
2487 offClose = sCode.find('}', off, offStop);
2488 if offClose < 0:
2489 break;
2490 cDepth += sCode.count('{', off, offClose);
2491 cDepth -= 1;
2492 if cDepth == 0:
2493 return offClose;
2494 off = offClose + 1;
2495 return -1;
2496
2497 @staticmethod
2498 def countSpacesAt(sCode, off, offStop):
2499 """ Returns the number of space characters at off in sCode. """
2500 offStart = off;
2501 while off < offStop and sCode[off].isspace():
2502 off += 1;
2503 return off - offStart;
2504
2505 @staticmethod
2506 def skipSpacesAt(sCode, off, offStop):
2507 """ Returns first offset at or after off for a non-space character. """
2508 return off + McBlock.countSpacesAt(sCode, off, offStop);
2509
2510 @staticmethod
2511 def isSubstrAt(sStr, off, sSubStr):
2512 """ Returns true of sSubStr is found at off in sStr. """
2513 return sStr[off : off + len(sSubStr)] == sSubStr;
2514
2515 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2516 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2517 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2518 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2519 + r')');
2520
2521 kaasConditions = (
2522 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2523 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2524 );
2525 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2526 """
2527 Decodes sRawCode[off : offStop].
2528
2529 Returns list of McStmt instances.
2530 Raises ParserException on failure.
2531 """
2532 if offStop < 0:
2533 offStop = len(sRawCode);
2534 aoStmts = [];
2535 while off < offStop:
2536 ch = sRawCode[off];
2537
2538 #
2539 # Skip spaces and comments.
2540 #
2541 if ch.isspace():
2542 off += 1;
2543
2544 elif ch == '/':
2545 ch = sRawCode[off + 1];
2546 if ch == '/': # C++ comment.
2547 off = sRawCode.find('\n', off + 2);
2548 if off < 0:
2549 break;
2550 off += 1;
2551 elif ch == '*': # C comment.
2552 off = sRawCode.find('*/', off + 2);
2553 if off < 0:
2554 break;
2555 off += 2;
2556 else:
2557 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2558
2559 #
2560 # Is it a MC statement.
2561 #
2562 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2563 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2564 # Extract it and strip comments from it.
2565 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2566 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2567 else: iCond = -1;
2568 if iCond < 0:
2569 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2570 if offEnd <= off:
2571 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2572 else:
2573 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2574 if offEnd <= off:
2575 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2576 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2577 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2578 offEnd -= 1;
2579 while offEnd > off and sRawCode[offEnd - 1].isspace():
2580 offEnd -= 1;
2581
2582 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2583
2584 # Isolate the statement name.
2585 offOpenParen = sRawStmt.find('(');
2586 if offOpenParen < 0:
2587 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2588 sName = sRawStmt[: offOpenParen].strip();
2589
2590 # Extract the parameters.
2591 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2592 if asParams is None:
2593 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2594 if offCloseParen + 1 != len(sRawStmt):
2595 self.raiseDecodeError(sRawCode, off,
2596 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2597
2598 # Hand it to the handler.
2599 fnParser = g_dMcStmtParsers.get(sName);
2600 if not fnParser:
2601 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2602 fnParser = fnParser[0];
2603 oStmt = fnParser(self, sName, asParams);
2604 if not isinstance(oStmt, (list, tuple)):
2605 aoStmts.append(oStmt);
2606 else:
2607 aoStmts.extend(oStmt);
2608
2609 #
2610 # If conditional, we need to parse the whole statement.
2611 #
2612 # For reasons of simplicity, we assume the following structure
2613 # and parse each branch in a recursive call:
2614 # IEM_MC_IF_XXX() {
2615 # IEM_MC_WHATEVER();
2616 # } IEM_MC_ELSE() {
2617 # IEM_MC_WHATEVER();
2618 # } IEM_MC_ENDIF();
2619 #
2620 if iCond >= 0:
2621 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2622 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2623
2624 # Find start of the IF block:
2625 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2626 if sRawCode[offBlock1] != '{':
2627 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2628
2629 # Find the end of it.
2630 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2631 if offBlock1End < 0:
2632 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2633
2634 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2635
2636 # Is there an else section?
2637 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2638 sElseNm = self.kaasConditions[iCond][1];
2639 if self.isSubstrAt(sRawCode, off, sElseNm):
2640 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2641 if sRawCode[off] != '(':
2642 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2643 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2644 if sRawCode[off] != ')':
2645 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2646
2647 # Find start of the ELSE block.
2648 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[offBlock2] != '{':
2650 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2651
2652 # Find the end of it.
2653 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2654 if offBlock2End < 0:
2655 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2656
2657 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2658 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2659
2660 # Parse past the endif statement.
2661 sEndIfNm = self.kaasConditions[iCond][2];
2662 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2663 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2664 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2665 if sRawCode[off] != '(':
2666 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2667 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2668 if sRawCode[off] != ')':
2669 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2670 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2671 if sRawCode[off] != ';':
2672 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2673 off += 1;
2674
2675 else:
2676 # Advance.
2677 off = offEnd + 1;
2678
2679 #
2680 # Otherwise it must be a C/C++ statement of sorts.
2681 #
2682 else:
2683 # Find the end of the statement. if and else requires special handling.
2684 sCondExpr = None;
2685 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2686 if oMatch:
2687 if oMatch.group(1)[-1] == '(':
2688 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2689 else:
2690 offEnd = oMatch.end();
2691 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2692 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2693 elif ch == '#':
2694 offEnd = sRawCode.find('\n', off, offStop);
2695 if offEnd < 0:
2696 offEnd = offStop;
2697 offEnd -= 1;
2698 while offEnd > off and sRawCode[offEnd - 1].isspace():
2699 offEnd -= 1;
2700 else:
2701 offEnd = sRawCode.find(';', off);
2702 if offEnd < 0:
2703 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2704
2705 # Check this and the following statement whether it might have
2706 # something to do with decoding. This is a statement filter
2707 # criteria when generating the threaded functions blocks.
2708 offNextEnd = sRawCode.find(';', offEnd + 1);
2709 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2710 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2711 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2712 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2713 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2714 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2715 );
2716
2717 if not oMatch:
2718 if ch != '#':
2719 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2720 else:
2721 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2722 off = offEnd + 1;
2723 elif oMatch.group(1).startswith('if'):
2724 #
2725 # if () xxx [else yyy] statement.
2726 #
2727 oStmt = McCppCond(sCondExpr, fDecode);
2728 aoStmts.append(oStmt);
2729 off = offEnd + 1;
2730
2731 # Following the if () we can either have a {} containing zero or more statements
2732 # or we have a single statement.
2733 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2734 if sRawCode[offBlock1] == '{':
2735 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2736 if offBlock1End < 0:
2737 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2738 offBlock1 += 1;
2739 else:
2740 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2741 if offBlock1End < 0:
2742 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2743
2744 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2745
2746 # The else is optional and can likewise be followed by {} or a single statement.
2747 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2748 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2749 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2750 if sRawCode[offBlock2] == '{':
2751 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2752 if offBlock2End < 0:
2753 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2754 offBlock2 += 1;
2755 else:
2756 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2757 if offBlock2End < 0:
2758 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2759
2760 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2761 off = offBlock2End + 1;
2762
2763 elif oMatch.group(1) == 'else':
2764 # Problematic 'else' branch, typically involving #ifdefs.
2765 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2766
2767 return aoStmts;
2768
2769 def decode(self):
2770 """
2771 Decodes the block, populating self.aoStmts if necessary.
2772 Returns the statement list.
2773 Raises ParserException on failure.
2774 """
2775 if not self.aoStmts:
2776 self.aoStmts = self.decodeCode(''.join(self.asLines));
2777 return self.aoStmts;
2778
2779
2780 def checkForTooEarlyEffSegUse(self, aoStmts):
2781 """
2782 Checks if iEffSeg is used before the effective address has been decoded.
2783 Returns None on success, error string on failure.
2784
2785 See r158454 for an example of this issue.
2786 """
2787
2788 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2789 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2790 # as we're ASSUMING these will not occur before address calculation.
2791 for iStmt, oStmt in enumerate(aoStmts):
2792 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2793 while iStmt > 0:
2794 iStmt -= 1;
2795 oStmt = aoStmts[iStmt];
2796 for sArg in oStmt.asParams:
2797 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2798 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2799 break;
2800 return None;
2801
2802 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2803 kdDecodeCppStmtOkayAfterDone = {
2804 'IEMOP_HLP_IN_VMX_OPERATION': True,
2805 'IEMOP_HLP_VMX_INSTR': True,
2806 };
2807
2808 def checkForDoneDecoding(self, aoStmts):
2809 """
2810 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2811 invocation.
2812 Returns None on success, error string on failure.
2813
2814 This ensures safe instruction restarting in case the recompiler runs
2815 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2816 entries).
2817 """
2818
2819 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2820 # don't need to look.
2821 cIemOpHlpDone = 0;
2822 for iStmt, oStmt in enumerate(aoStmts):
2823 if oStmt.isCppStmt():
2824 #print('dbg: #%u[%u]: %s %s (%s)'
2825 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2826
2827 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2828 if oMatch:
2829 sFirstWord = oMatch.group(1);
2830 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2831 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2832 cIemOpHlpDone += 1;
2833 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2834 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2835 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2836 else:
2837 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2838 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2839 cIemOpHlpDone += 1;
2840 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2841 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2842 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2843 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2844 if cIemOpHlpDone == 1:
2845 return None;
2846 if cIemOpHlpDone > 1:
2847 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2848 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2849
2850 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2851 """
2852 Checks that the register references are placed after register fetches
2853 from the same register class.
2854 Returns None on success, error string on failure.
2855
2856 Example:
2857 SHL CH, CL
2858
2859 If the CH reference is created first, the fetching of CL will cause the
2860 RCX guest register to have an active shadow register when it's being
2861 updated. The shadow register will then be stale after the SHL operation
2862 completes, without us noticing.
2863
2864 It's easier to ensure we've got correct code than complicating the
2865 recompiler code with safeguards here.
2866 """
2867 for iStmt, oStmt in enumerate(aoStmts):
2868 if not oStmt.isCppStmt():
2869 offRef = oStmt.sName.find("_REF_");
2870 if offRef > 0:
2871 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2872 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2873 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2874 sClass = 'FPUREG';
2875 else:
2876 offUnderscore = oStmt.sName.find('_', offRef + 5);
2877 if offUnderscore > 0:
2878 assert offUnderscore > offRef;
2879 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2880 else:
2881 sClass = oStmt.sName[offRef + 5];
2882 asRegRefClasses[sClass] = True;
2883 else:
2884 offFetch = oStmt.sName.find("_FETCH_");
2885 if offFetch > 0:
2886 sClass = oStmt.sName[offFetch + 7 : ];
2887 if not sClass.startswith("MEM"):
2888 offUnderscore = sClass.find('_');
2889 if offUnderscore >= 0:
2890 assert offUnderscore > 0;
2891 sClass = sClass[:offUnderscore];
2892 if sClass in asRegRefClasses:
2893 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2894 % (iStmt + 1, oStmt.sName,);
2895
2896 # Go into branches.
2897 if isinstance(oStmt, McStmtCond):
2898 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2899 if sRet:
2900 return sRet;
2901 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2902 if sRet:
2903 return sRet;
2904 return None;
2905
2906 def check(self):
2907 """
2908 Performs some sanity checks on the block.
2909 Returns error string list, empty if all is fine.
2910 """
2911 aoStmts = self.decode();
2912 asRet = [];
2913
2914 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2915 if sRet:
2916 asRet.append(sRet);
2917
2918 sRet = self.checkForDoneDecoding(aoStmts);
2919 if sRet:
2920 asRet.append(sRet);
2921
2922 sRet = self.checkForFetchAfterRef(aoStmts, {});
2923 if sRet:
2924 asRet.append(sRet);
2925
2926 return asRet;
2927
2928
2929## Temporary flag for enabling / disabling experimental MCs depending on the
2930## SIMD register allocator.
2931g_fNativeSimd = True;
2932
2933## IEM_MC_XXX -> parser + info dictionary.
2934#
2935# The info columns:
2936# - col 1+0: boolean entry indicating whether the statement modifies state and
2937# must not be used before IEMOP_HL_DONE_*.
2938# - col 1+1: boolean entry indicating similar to the previous column but is
2939# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2940# The difference is that most IEM_MC_IF_XXX entries are False here.
2941# - col 1+2: boolean entry indicating native recompiler support.
2942#
2943# The raw table was generated via the following command
2944# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2945# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2946# pylint: disable=line-too-long
2947g_dMcStmtParsers = {
2948 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2949 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2950 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2951 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2953 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2954 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2955 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2956 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2957 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2958 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2959 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2960 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2961 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2962 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2965 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2966 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2967 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2968 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2969 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2970 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2971 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2972 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2973 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2974 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2975 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2976 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2977 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2978 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2979 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2980 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2981 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2982 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2983 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2984 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2985 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2986 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2987 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2988 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2989 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2990 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2991 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2992 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2993 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2994 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2995 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2996 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2997 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2998 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2999 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
3000 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
3001 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
3002 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
3003 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
3004 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
3005 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
3006 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3007 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3008 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3009 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3010 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3011 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3012 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3013 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3014 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3015 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3016 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3017 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3018 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3019 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
3020 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3021 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3022 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3023 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3024 'IEM_MC_COMMIT_EFLAGS_OPT': (McBlock.parseMcGeneric, True, True, True, ),
3025 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3026 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3027 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3028 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3029 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3030 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3031 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3032 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3033 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3034 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3035 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3036 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3037 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3038 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3039 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3040 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3041 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3042 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3043 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3044 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3045 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3046 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3047 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3048 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3049 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3050 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3051 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3052 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3053 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3054 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3055 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3056 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3057 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3058 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3059 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3060 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3061 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3062 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3063 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3064 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3065 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3066 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3067 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3068 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3069 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3070 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3071 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3072 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3073 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3074 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3075 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3076 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3079 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3080 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3081 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3082 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3083 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3084 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3085 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3086 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3087 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3088 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3089 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3090 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3091 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3092 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3093 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3094 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3095 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3096 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3097 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3098 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3099 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3100 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3101 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3102 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3103 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FETCH_MREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3106 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3107 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3108 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3109 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3110 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3111 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3112 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3113 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3114 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3115 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3116 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3117 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3118 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3119 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3120 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3121 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3122 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3123 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3124 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3125 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3126 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3127 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3128 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3129 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3130 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3131 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3132 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3133 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3134 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3135 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3136 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3137 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3138 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3139 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3140 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3141 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3142 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3143 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3144 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3145 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3146 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3147 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3148 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3149 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3150 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3151 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3152 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3153 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3154 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3155 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3156 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3157 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3158 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3159 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3160 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3161 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3162 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3163 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3164 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, True, ),
3165 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, g_fNativeSimd),
3166 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3167 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3168 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3169 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3170 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3171 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3172 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3173 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3174 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3175 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3176 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3177 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3178 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3179 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3183 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3184 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3185 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3186 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3188 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3193 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3198 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3199 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3200 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3201 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3202 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3203 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3204 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3205 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3215 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3216 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3217 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3218 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3219 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3220 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3221 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3222 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3223 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3224 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3225 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3226 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3227 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3228 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3230 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3231 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3232 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3233 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3234 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3235 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3236 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3237 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3238 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3239 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3240 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3241 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3242 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3243 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3244 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3245 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3247 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3249 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3250 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3251 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3252 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3253 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3254 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3255 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3256 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3257 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3258 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3259 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3260 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3261 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, True, ),
3262 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3263 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3264 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3265 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3266 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3267 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3268 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3269 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3270 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3271 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3272 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3273 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3274 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3275 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3276 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3277 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3278 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3279 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3280 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3281 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3282 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3283 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3284 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3285 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3286 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3287 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3288 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3289 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3290 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3291 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3292 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3293 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3294 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3295 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3296 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3297 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3298 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3299 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3300 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3301 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3302 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3306 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3307 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3308 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3309 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3311 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3312 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3313 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3314 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3315 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3316 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3317 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3318 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3319 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3320 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3321 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3322 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3323 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3324 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3325 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3337 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3338 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3339 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3343 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3344 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3345 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3346 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3347 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3348 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3349 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3350 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3351 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3352 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3353 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3356 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3358 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3359 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3360 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3361 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3362 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3363 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3364 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3365 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3366 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3367 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3368 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3369 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3370 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3371 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3372 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3373 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3374 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3375 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3376 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3377 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3378 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3379 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3380 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3381 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3382 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3383};
3384# pylint: enable=line-too-long
3385
3386## List of microcode blocks.
3387g_aoMcBlocks = [] # type: List[McBlock]
3388
3389
3390
3391class ParserException(Exception):
3392 """ Parser exception """
3393 def __init__(self, sMessage):
3394 Exception.__init__(self, sMessage);
3395
3396
3397class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3398 """
3399 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3400 """
3401
3402 ## @name Parser state.
3403 ## @{
3404 kiCode = 0;
3405 kiCommentMulti = 1;
3406 ## @}
3407
3408 class Macro(object):
3409 """ Macro """
3410 def __init__(self, sName, asArgs, sBody, iLine):
3411 self.sName = sName; ##< The macro name.
3412 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3413 self.sBody = sBody;
3414 self.iLine = iLine;
3415 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3416
3417 @staticmethod
3418 def _needSpace(ch):
3419 """ This is just to make the expanded output a bit prettier. """
3420 return ch.isspace() and ch != '(';
3421
3422 def expandMacro(self, oParent, asArgs = None):
3423 """ Expands the macro body with the given arguments. """
3424 _ = oParent;
3425 sBody = self.sBody;
3426
3427 if self.oReArgMatch:
3428 assert len(asArgs) == len(self.asArgs);
3429 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3430
3431 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3432 oMatch = self.oReArgMatch.search(sBody);
3433 while oMatch:
3434 sName = oMatch.group(2);
3435 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3436 sValue = dArgs[sName];
3437 sPre = '';
3438 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3439 sPre = ' ';
3440 sPost = '';
3441 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3442 sPost = ' ';
3443 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3444 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3445 else:
3446 assert not asArgs;
3447
3448 return sBody;
3449
3450 class PreprocessorConditional(object):
3451 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3452
3453 ## Known defines.
3454 # - A value of 1 indicates that it's always defined.
3455 # - A value of 0 if it's always undefined
3456 # - A value of -1 if it's an arch and it depends of script parameters.
3457 # - A value of -2 if it's not recognized when filtering MC blocks.
3458 kdKnownDefines = {
3459 'IEM_WITH_ONE_BYTE_TABLE': 1,
3460 'IEM_WITH_TWO_BYTE_TABLE': 1,
3461 'IEM_WITH_THREE_0F_38': 1,
3462 'IEM_WITH_THREE_0F_3A': 1,
3463 'IEM_WITH_THREE_BYTE_TABLES': 1,
3464 'IEM_WITH_3DNOW': 1,
3465 'IEM_WITH_3DNOW_TABLE': 1,
3466 'IEM_WITH_VEX': 1,
3467 'IEM_WITH_VEX_TABLES': 1,
3468 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3469 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3470 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3471 'LOG_ENABLED': 1,
3472 'RT_WITHOUT_PRAGMA_ONCE': 0,
3473 'TST_IEM_CHECK_MC': 0,
3474 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3475 'RT_ARCH_AMD64': -1,
3476 'RT_ARCH_ARM64': -1,
3477 'RT_ARCH_ARM32': -1,
3478 'RT_ARCH_X86': -1,
3479 'RT_ARCH_SPARC': -1,
3480 'RT_ARCH_SPARC64': -1,
3481 };
3482 kdBuildArchToIprt = {
3483 'amd64': 'RT_ARCH_AMD64',
3484 'arm64': 'RT_ARCH_ARM64',
3485 'sparc32': 'RT_ARCH_SPARC64',
3486 };
3487 ## For parsing the next defined(xxxx).
3488 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3489
3490 def __init__(self, sType, sExpr):
3491 self.sType = sType;
3492 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3493 self.aoElif = [] # type: List[PreprocessorConditional]
3494 self.fInElse = [];
3495 if sType in ('if', 'elif'):
3496 self.checkExpression(sExpr);
3497 else:
3498 self.checkSupportedDefine(sExpr)
3499
3500 @staticmethod
3501 def checkSupportedDefine(sDefine):
3502 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3503 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3504 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3505 return True;
3506 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3507 return True;
3508 raise Exception('Unsupported define: %s' % (sDefine,));
3509
3510 @staticmethod
3511 def checkExpression(sExpr):
3512 """ Check that the expression is supported. Raises exception if not. """
3513 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3514 if sExpr in ('0', '1'):
3515 return True;
3516
3517 off = 0;
3518 cParan = 0;
3519 while off < len(sExpr):
3520 ch = sExpr[off];
3521
3522 # Unary operator or parentheses:
3523 if ch in ('(', '!'):
3524 if ch == '(':
3525 cParan += 1;
3526 off += 1;
3527 else:
3528 # defined(xxxx)
3529 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3530 if oMatch:
3531 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3532 elif sExpr[off:] != '1':
3533 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3534 off = oMatch.end();
3535
3536 # Look for closing parentheses.
3537 while off < len(sExpr) and sExpr[off].isspace():
3538 off += 1;
3539 if cParan > 0:
3540 while off < len(sExpr) and sExpr[off] == ')':
3541 if cParan <= 0:
3542 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3543 cParan -= 1;
3544 off += 1;
3545 while off < len(sExpr) and sExpr[off].isspace():
3546 off += 1;
3547
3548 # Look for binary operator.
3549 if off >= len(sExpr):
3550 break;
3551 if sExpr[off:off + 2] in ('||', '&&'):
3552 off += 2;
3553 else:
3554 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3555
3556 # Skip spaces.
3557 while off < len(sExpr) and sExpr[off].isspace():
3558 off += 1;
3559 if cParan != 0:
3560 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3561 return True;
3562
3563 @staticmethod
3564 def isArchIncludedInExpr(sExpr, sArch):
3565 """ Checks if sArch is included in the given expression. """
3566 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3567 if sExpr == '0':
3568 return False;
3569 if sExpr == '1':
3570 return True;
3571 off = 0;
3572 while off < len(sExpr):
3573 # defined(xxxx)
3574 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3575 if not oMatch:
3576 if sExpr[off:] == '1':
3577 return True;
3578 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3579 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3580 return True;
3581 off = oMatch.end();
3582
3583 # Look for OR operator.
3584 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3585 off += 1;
3586 if off >= len(sExpr):
3587 break;
3588 if sExpr.startswith('||'):
3589 off += 2;
3590 else:
3591 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3592
3593 return False;
3594
3595 @staticmethod
3596 def matchArch(sDefine, sArch):
3597 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3598 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3599
3600 @staticmethod
3601 def matchDefined(sExpr, sArch):
3602 """ Check the result of an ifdef/ifndef expression, given sArch. """
3603 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3604 if iDefine == -2:
3605 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3606 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3607
3608 def isArchIncludedInPrimaryBlock(self, sArch):
3609 """ Checks if sArch is included in the (primary) 'if' block. """
3610 if self.sType == 'ifdef':
3611 return self.matchDefined(self.sExpr, sArch);
3612 if self.sType == 'ifndef':
3613 return not self.matchDefined(self.sExpr, sArch);
3614 return self.isArchIncludedInExpr(self.sExpr, sArch);
3615
3616 @staticmethod
3617 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3618 """ Checks if sArch is included in the current conditional block. """
3619 _ = iLine;
3620 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3621 for oCond in aoCppCondStack:
3622 if oCond.isArchIncludedInPrimaryBlock(sArch):
3623 if oCond.aoElif or oCond.fInElse:
3624 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3625 return False;
3626 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3627 else:
3628 fFine = False;
3629 for oElifCond in oCond.aoElif:
3630 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3631 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3632 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3633 return False;
3634 fFine = True;
3635 if not fFine and not oCond.fInElse:
3636 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3637 return False;
3638 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3639 return True;
3640
3641 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3642 self.sSrcFile = sSrcFile;
3643 self.asLines = asLines;
3644 self.iLine = 0;
3645 self.iState = self.kiCode;
3646 self.sComment = '';
3647 self.iCommentLine = 0;
3648 self.aoCurInstrs = [] # type: List[Instruction]
3649 self.oCurFunction = None # type: DecoderFunction
3650 self.iMcBlockInFunc = 0;
3651 self.oCurMcBlock = None # type: McBlock
3652 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3653 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3654 if oInheritMacrosFrom:
3655 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3656 self.oReMacros = oInheritMacrosFrom.oReMacros;
3657 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3658 self.sHostArch = sHostArch;
3659
3660 assert sDefaultMap in g_dInstructionMaps;
3661 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3662
3663 self.cTotalInstr = 0;
3664 self.cTotalStubs = 0;
3665 self.cTotalTagged = 0;
3666 self.cTotalMcBlocks = 0;
3667
3668 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3669 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3670 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3671 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3672 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3673 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3674 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3675 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3676 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3677 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3678 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3679 self.fDebug = True;
3680 self.fDebugMc = False;
3681 self.fDebugPreproc = False;
3682
3683 self.dTagHandlers = {
3684 '@opbrief': self.parseTagOpBrief,
3685 '@opdesc': self.parseTagOpDesc,
3686 '@opmnemonic': self.parseTagOpMnemonic,
3687 '@op1': self.parseTagOpOperandN,
3688 '@op2': self.parseTagOpOperandN,
3689 '@op3': self.parseTagOpOperandN,
3690 '@op4': self.parseTagOpOperandN,
3691 '@oppfx': self.parseTagOpPfx,
3692 '@opmaps': self.parseTagOpMaps,
3693 '@opcode': self.parseTagOpcode,
3694 '@opcodesub': self.parseTagOpcodeSub,
3695 '@openc': self.parseTagOpEnc,
3696 #@opfltest: Lists all flags that will be used as input in some way.
3697 '@opfltest': self.parseTagOpEFlags,
3698 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3699 '@opflmodify': self.parseTagOpEFlags,
3700 #@opflclear: Lists all flags that will be set (set to 1).
3701 '@opflset': self.parseTagOpEFlags,
3702 #@opflclear: Lists all flags that will be cleared (set to 0).
3703 '@opflclear': self.parseTagOpEFlags,
3704 #@opflundef: List of flag documented as undefined.
3705 '@opflundef': self.parseTagOpEFlags,
3706 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3707 '@opflclass': self.parseTagOpEFlagsClass,
3708 '@ophints': self.parseTagOpHints,
3709 '@opdisenum': self.parseTagOpDisEnum,
3710 '@opmincpu': self.parseTagOpMinCpu,
3711 '@opcpuid': self.parseTagOpCpuId,
3712 '@opgroup': self.parseTagOpGroup,
3713 '@opunused': self.parseTagOpUnusedInvalid,
3714 '@opinvalid': self.parseTagOpUnusedInvalid,
3715 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3716 '@optest': self.parseTagOpTest,
3717 '@optestign': self.parseTagOpTestIgnore,
3718 '@optestignore': self.parseTagOpTestIgnore,
3719 '@opcopytests': self.parseTagOpCopyTests,
3720 '@oponly': self.parseTagOpOnlyTest,
3721 '@oponlytest': self.parseTagOpOnlyTest,
3722 '@opxcpttype': self.parseTagOpXcptType,
3723 '@opstats': self.parseTagOpStats,
3724 '@opfunction': self.parseTagOpFunction,
3725 '@opdone': self.parseTagOpDone,
3726 };
3727 for i in range(48):
3728 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3729 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3730
3731 self.asErrors = [];
3732
3733 def raiseError(self, sMessage):
3734 """
3735 Raise error prefixed with the source and line number.
3736 """
3737 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3738
3739 def raiseCommentError(self, iLineInComment, sMessage):
3740 """
3741 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3742 """
3743 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3744
3745 def error(self, sMessage):
3746 """
3747 Adds an error.
3748 returns False;
3749 """
3750 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3751 return False;
3752
3753 def errorOnLine(self, iLine, sMessage):
3754 """
3755 Adds an error.
3756 returns False;
3757 """
3758 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3759 return False;
3760
3761 def errorComment(self, iLineInComment, sMessage):
3762 """
3763 Adds a comment error.
3764 returns False;
3765 """
3766 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3767 return False;
3768
3769 def printErrors(self):
3770 """
3771 Print the errors to stderr.
3772 Returns number of errors.
3773 """
3774 if self.asErrors:
3775 sys.stderr.write(u''.join(self.asErrors));
3776 return len(self.asErrors);
3777
3778 def debug(self, sMessage):
3779 """
3780 For debugging.
3781 """
3782 if self.fDebug:
3783 print('debug: %s' % (sMessage,), file = sys.stderr);
3784
3785 def stripComments(self, sLine):
3786 """
3787 Returns sLine with comments stripped.
3788
3789 Complains if traces of incomplete multi-line comments are encountered.
3790 """
3791 sLine = self.oReComment.sub(" ", sLine);
3792 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3793 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3794 return sLine;
3795
3796 def parseFunctionTable(self, sLine):
3797 """
3798 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3799
3800 Note! Updates iLine as it consumes the whole table.
3801 """
3802
3803 #
3804 # Extract the table name.
3805 #
3806 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3807 oMap = g_dInstructionMapsByIemName.get(sName);
3808 if not oMap:
3809 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3810 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3811
3812 #
3813 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3814 # entries per byte:
3815 # no prefix, 066h prefix, f3h prefix, f2h prefix
3816 # Those tables has 256 & 32 entries respectively.
3817 #
3818 cEntriesPerByte = 4;
3819 cValidTableLength = 1024;
3820 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3821
3822 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3823 if oEntriesMatch:
3824 cEntriesPerByte = 1;
3825 cValidTableLength = int(oEntriesMatch.group(1));
3826 asPrefixes = (None,);
3827
3828 #
3829 # The next line should be '{' and nothing else.
3830 #
3831 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3832 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3833 self.iLine += 1;
3834
3835 #
3836 # Parse till we find the end of the table.
3837 #
3838 iEntry = 0;
3839 while self.iLine < len(self.asLines):
3840 # Get the next line and strip comments and spaces (assumes no
3841 # multi-line comments).
3842 sLine = self.asLines[self.iLine];
3843 self.iLine += 1;
3844 sLine = self.stripComments(sLine).strip();
3845
3846 # Split the line up into entries, expanding IEMOP_X4 usage.
3847 asEntries = sLine.split(',');
3848 for i in range(len(asEntries) - 1, -1, -1):
3849 sEntry = asEntries[i].strip();
3850 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3851 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3852 asEntries.insert(i + 1, sEntry);
3853 asEntries.insert(i + 1, sEntry);
3854 asEntries.insert(i + 1, sEntry);
3855 if sEntry:
3856 asEntries[i] = sEntry;
3857 else:
3858 del asEntries[i];
3859
3860 # Process the entries.
3861 for sEntry in asEntries:
3862 if sEntry in ('};', '}'):
3863 if iEntry != cValidTableLength:
3864 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3865 return True;
3866 if sEntry.startswith('iemOp_Invalid'):
3867 pass; # skip
3868 else:
3869 # Look up matching instruction by function.
3870 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3871 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3872 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3873 if aoInstr:
3874 if not isinstance(aoInstr, list):
3875 aoInstr = [aoInstr,];
3876 oInstr = None;
3877 for oCurInstr in aoInstr:
3878 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3879 pass;
3880 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3881 oCurInstr.sPrefix = sPrefix;
3882 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3883 oCurInstr.sOpcode = sOpcode;
3884 oCurInstr.sPrefix = sPrefix;
3885 else:
3886 continue;
3887 oInstr = oCurInstr;
3888 break;
3889 if not oInstr:
3890 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3891 aoInstr.append(oInstr);
3892 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3893 g_aoAllInstructions.append(oInstr);
3894 oMap.aoInstructions.append(oInstr);
3895 else:
3896 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3897 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3898 iEntry += 1;
3899
3900 return self.error('Unexpected end of file in PFNIEMOP table');
3901
3902 def addInstruction(self, iLine = None):
3903 """
3904 Adds an instruction.
3905 """
3906 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3907 g_aoAllInstructions.append(oInstr);
3908 self.aoCurInstrs.append(oInstr);
3909 return oInstr;
3910
3911 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3912 """
3913 Derives the mnemonic and operands from a IEM stats base name like string.
3914 """
3915 if oInstr.sMnemonic is None:
3916 asWords = sStats.split('_');
3917 oInstr.sMnemonic = asWords[0].lower();
3918 if len(asWords) > 1 and not oInstr.aoOperands:
3919 for sType in asWords[1:]:
3920 if sType in g_kdOpTypes:
3921 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3922 else:
3923 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3924 return False;
3925 return True;
3926
3927 def doneInstructionOne(self, oInstr, iLine):
3928 """
3929 Complete the parsing by processing, validating and expanding raw inputs.
3930 """
3931 assert oInstr.iLineCompleted is None;
3932 oInstr.iLineCompleted = iLine;
3933
3934 #
3935 # Specified instructions.
3936 #
3937 if oInstr.cOpTags > 0:
3938 if oInstr.sStats is None:
3939 pass;
3940
3941 #
3942 # Unspecified legacy stuff. We generally only got a few things to go on here.
3943 # /** Opcode 0x0f 0x00 /0. */
3944 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3945 #
3946 else:
3947 #if oInstr.sRawOldOpcodes:
3948 #
3949 #if oInstr.sMnemonic:
3950 pass;
3951
3952 #
3953 # Common defaults.
3954 #
3955
3956 # Guess mnemonic and operands from stats if the former is missing.
3957 if oInstr.sMnemonic is None:
3958 if oInstr.sStats is not None:
3959 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3960 elif oInstr.sFunction is not None:
3961 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3962
3963 # Derive the disassembler op enum constant from the mnemonic.
3964 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3965 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3966
3967 # Derive the IEM statistics base name from mnemonic and operand types.
3968 if oInstr.sStats is None:
3969 if oInstr.sFunction is not None:
3970 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3971 elif oInstr.sMnemonic is not None:
3972 oInstr.sStats = oInstr.sMnemonic;
3973 for oOperand in oInstr.aoOperands:
3974 if oOperand.sType:
3975 oInstr.sStats += '_' + oOperand.sType;
3976
3977 # Derive the IEM function name from mnemonic and operand types.
3978 if oInstr.sFunction is None:
3979 if oInstr.sMnemonic is not None:
3980 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3981 for oOperand in oInstr.aoOperands:
3982 if oOperand.sType:
3983 oInstr.sFunction += '_' + oOperand.sType;
3984 elif oInstr.sStats:
3985 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3986
3987 #
3988 # Apply default map and then add the instruction to all it's groups.
3989 #
3990 if not oInstr.aoMaps:
3991 oInstr.aoMaps = [ self.oDefaultMap, ];
3992 for oMap in oInstr.aoMaps:
3993 oMap.aoInstructions.append(oInstr);
3994
3995 #
3996 # Derive encoding from operands and maps.
3997 #
3998 if oInstr.sEncoding is None:
3999 if not oInstr.aoOperands:
4000 if oInstr.fUnused and oInstr.sSubOpcode:
4001 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
4002 else:
4003 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
4004 elif oInstr.aoOperands[0].usesModRM():
4005 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
4006 or oInstr.onlyInVexMaps():
4007 oInstr.sEncoding = 'VEX.ModR/M';
4008 else:
4009 oInstr.sEncoding = 'ModR/M';
4010
4011 #
4012 # Check the opstat value and add it to the opstat indexed dictionary.
4013 #
4014 if oInstr.sStats:
4015 if oInstr.sStats not in g_dAllInstructionsByStat:
4016 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
4017 else:
4018 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
4019 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
4020
4021 #
4022 # Add to function indexed dictionary. We allow multiple instructions per function.
4023 #
4024 if oInstr.sFunction:
4025 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4026 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4027 else:
4028 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4029
4030 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4031 return True;
4032
4033 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4034 """
4035 Done with current instruction.
4036 """
4037 for oInstr in self.aoCurInstrs:
4038 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4039 if oInstr.fStub:
4040 self.cTotalStubs += 1;
4041
4042 self.cTotalInstr += len(self.aoCurInstrs);
4043
4044 self.sComment = '';
4045 self.aoCurInstrs = [];
4046 if fEndOfFunction:
4047 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4048 if self.oCurFunction:
4049 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4050 self.oCurFunction = None;
4051 self.iMcBlockInFunc = 0;
4052 return True;
4053
4054 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4055 """
4056 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4057 is False, only None values and empty strings are replaced.
4058 """
4059 for oInstr in self.aoCurInstrs:
4060 if fOverwrite is not True:
4061 oOldValue = getattr(oInstr, sAttrib);
4062 if oOldValue is not None:
4063 continue;
4064 setattr(oInstr, sAttrib, oValue);
4065
4066 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4067 """
4068 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4069 If fOverwrite is False, only None values and empty strings are replaced.
4070 """
4071 for oInstr in self.aoCurInstrs:
4072 aoArray = getattr(oInstr, sAttrib);
4073 while len(aoArray) <= iEntry:
4074 aoArray.append(None);
4075 if fOverwrite is True or aoArray[iEntry] is None:
4076 aoArray[iEntry] = oValue;
4077
4078 def parseCommentOldOpcode(self, asLines):
4079 """ Deals with 'Opcode 0xff /4' like comments """
4080 asWords = asLines[0].split();
4081 if len(asWords) >= 2 \
4082 and asWords[0] == 'Opcode' \
4083 and ( asWords[1].startswith('0x')
4084 or asWords[1].startswith('0X')):
4085 asWords = asWords[:1];
4086 for iWord, sWord in enumerate(asWords):
4087 if sWord.startswith('0X'):
4088 sWord = '0x' + sWord[:2];
4089 asWords[iWord] = asWords;
4090 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4091
4092 return False;
4093
4094 def ensureInstructionForOpTag(self, iTagLine):
4095 """ Ensure there is an instruction for the op-tag being parsed. """
4096 if not self.aoCurInstrs:
4097 self.addInstruction(self.iCommentLine + iTagLine);
4098 for oInstr in self.aoCurInstrs:
4099 oInstr.cOpTags += 1;
4100 if oInstr.cOpTags == 1:
4101 self.cTotalTagged += 1;
4102 return self.aoCurInstrs[-1];
4103
4104 @staticmethod
4105 def flattenSections(aasSections):
4106 """
4107 Flattens multiline sections into stripped single strings.
4108 Returns list of strings, on section per string.
4109 """
4110 asRet = [];
4111 for asLines in aasSections:
4112 if asLines:
4113 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4114 return asRet;
4115
4116 @staticmethod
4117 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4118 """
4119 Flattens sections into a simple stripped string with newlines as
4120 section breaks. The final section does not sport a trailing newline.
4121 """
4122 # Typical: One section with a single line.
4123 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4124 return aasSections[0][0].strip();
4125
4126 sRet = '';
4127 for iSection, asLines in enumerate(aasSections):
4128 if asLines:
4129 if iSection > 0:
4130 sRet += sSectionSep;
4131 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4132 return sRet;
4133
4134
4135
4136 ## @name Tag parsers
4137 ## @{
4138
4139 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4140 """
4141 Tag: @opbrief
4142 Value: Text description, multiple sections, appended.
4143
4144 Brief description. If not given, it's the first sentence from @opdesc.
4145 """
4146 oInstr = self.ensureInstructionForOpTag(iTagLine);
4147
4148 # Flatten and validate the value.
4149 sBrief = self.flattenAllSections(aasSections);
4150 if not sBrief:
4151 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4152 if sBrief[-1] != '.':
4153 sBrief = sBrief + '.';
4154 if len(sBrief) > 180:
4155 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4156 offDot = sBrief.find('.');
4157 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4158 offDot = sBrief.find('.', offDot + 1);
4159 if offDot >= 0 and offDot != len(sBrief) - 1:
4160 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4161
4162 # Update the instruction.
4163 if oInstr.sBrief is not None:
4164 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4165 % (sTag, oInstr.sBrief, sBrief,));
4166 _ = iEndLine;
4167 return True;
4168
4169 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4170 """
4171 Tag: @opdesc
4172 Value: Text description, multiple sections, appended.
4173
4174 It is used to describe instructions.
4175 """
4176 oInstr = self.ensureInstructionForOpTag(iTagLine);
4177 if aasSections:
4178 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4179 return True;
4180
4181 _ = sTag; _ = iEndLine;
4182 return True;
4183
4184 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4185 """
4186 Tag: @opmenmonic
4187 Value: mnemonic
4188
4189 The 'mnemonic' value must be a valid C identifier string. Because of
4190 prefixes, groups and whatnot, there times when the mnemonic isn't that
4191 of an actual assembler mnemonic.
4192 """
4193 oInstr = self.ensureInstructionForOpTag(iTagLine);
4194
4195 # Flatten and validate the value.
4196 sMnemonic = self.flattenAllSections(aasSections);
4197 if not self.oReMnemonic.match(sMnemonic):
4198 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4199 if oInstr.sMnemonic is not None:
4200 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4201 % (sTag, oInstr.sMnemonic, sMnemonic,));
4202 oInstr.sMnemonic = sMnemonic
4203
4204 _ = iEndLine;
4205 return True;
4206
4207 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4208 """
4209 Tags: @op1, @op2, @op3, @op4
4210 Value: [where:]type
4211
4212 The 'where' value indicates where the operand is found, like the 'reg'
4213 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4214 a list.
4215
4216 The 'type' value indicates the operand type. These follow the types
4217 given in the opcode tables in the CPU reference manuals.
4218 See Instruction.kdOperandTypes for a list.
4219
4220 """
4221 oInstr = self.ensureInstructionForOpTag(iTagLine);
4222 idxOp = int(sTag[-1]) - 1;
4223 assert 0 <= idxOp < 4;
4224
4225 # flatten, split up, and validate the "where:type" value.
4226 sFlattened = self.flattenAllSections(aasSections);
4227 asSplit = sFlattened.split(':');
4228 if len(asSplit) == 1:
4229 sType = asSplit[0];
4230 sWhere = None;
4231 elif len(asSplit) == 2:
4232 (sWhere, sType) = asSplit;
4233 else:
4234 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4235
4236 if sType not in g_kdOpTypes:
4237 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4238 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4239 if sWhere is None:
4240 sWhere = g_kdOpTypes[sType][1];
4241 elif sWhere not in g_kdOpLocations:
4242 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4243 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4244
4245 # Insert the operand, refusing to overwrite an existing one.
4246 while idxOp >= len(oInstr.aoOperands):
4247 oInstr.aoOperands.append(None);
4248 if oInstr.aoOperands[idxOp] is not None:
4249 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4250 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4251 sWhere, sType,));
4252 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4253
4254 _ = iEndLine;
4255 return True;
4256
4257 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4258 """
4259 Tag: @opmaps
4260 Value: map[,map2]
4261
4262 Indicates which maps the instruction is in. There is a default map
4263 associated with each input file.
4264 """
4265 oInstr = self.ensureInstructionForOpTag(iTagLine);
4266
4267 # Flatten, split up and validate the value.
4268 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4269 asMaps = sFlattened.split(',');
4270 if not asMaps:
4271 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4272 for sMap in asMaps:
4273 if sMap not in g_dInstructionMaps:
4274 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4275 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4276
4277 # Add the maps to the current list. Throw errors on duplicates.
4278 for oMap in oInstr.aoMaps:
4279 if oMap.sName in asMaps:
4280 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4281
4282 for sMap in asMaps:
4283 oMap = g_dInstructionMaps[sMap];
4284 if oMap not in oInstr.aoMaps:
4285 oInstr.aoMaps.append(oMap);
4286 else:
4287 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4288
4289 _ = iEndLine;
4290 return True;
4291
4292 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4293 """
4294 Tag: @oppfx
4295 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4296
4297 Required prefix for the instruction. (In a (E)VEX context this is the
4298 value of the 'pp' field rather than an actual prefix.)
4299 """
4300 oInstr = self.ensureInstructionForOpTag(iTagLine);
4301
4302 # Flatten and validate the value.
4303 sFlattened = self.flattenAllSections(aasSections);
4304 asPrefixes = sFlattened.split();
4305 if len(asPrefixes) > 1:
4306 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4307
4308 sPrefix = asPrefixes[0].lower();
4309 if sPrefix == 'none':
4310 sPrefix = 'none';
4311 elif sPrefix == 'n/a':
4312 sPrefix = None;
4313 else:
4314 if len(sPrefix) == 2:
4315 sPrefix = '0x' + sPrefix;
4316 if not _isValidOpcodeByte(sPrefix):
4317 if sPrefix != '!0xf3':
4318 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4319
4320 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4321 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4322
4323 # Set it.
4324 if oInstr.sPrefix is not None:
4325 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4326 oInstr.sPrefix = sPrefix;
4327
4328 _ = iEndLine;
4329 return True;
4330
4331 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4332 """
4333 Tag: @opcode
4334 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4335
4336 The opcode byte or sub-byte for the instruction in the context of a map.
4337 """
4338 oInstr = self.ensureInstructionForOpTag(iTagLine);
4339
4340 # Flatten and validate the value.
4341 sOpcode = self.flattenAllSections(aasSections);
4342 if _isValidOpcodeByte(sOpcode):
4343 pass;
4344 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4345 pass;
4346 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4347 pass;
4348 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4349 pass;
4350 else:
4351 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4352
4353 # Set it.
4354 if oInstr.sOpcode is not None:
4355 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4356 oInstr.sOpcode = sOpcode;
4357
4358 _ = iEndLine;
4359 return True;
4360
4361 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4362 """
4363 Tag: @opcodesub
4364 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4365 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4366 | !11 rex.w=0 | !11 mr/reg rex.w=0
4367 | !11 rex.w=1 | !11 mr/reg rex.w=1
4368
4369 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4370 represents exactly two different instructions. The more proper way would
4371 be to go via maps with two members, but this is faster.
4372 """
4373 oInstr = self.ensureInstructionForOpTag(iTagLine);
4374
4375 # Flatten and validate the value.
4376 sSubOpcode = self.flattenAllSections(aasSections);
4377 if sSubOpcode not in g_kdSubOpcodes:
4378 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4379 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4380 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4381
4382 # Set it.
4383 if oInstr.sSubOpcode is not None:
4384 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4385 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4386 oInstr.sSubOpcode = sSubOpcode;
4387
4388 _ = iEndLine;
4389 return True;
4390
4391 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4392 """
4393 Tag: @openc
4394 Value: ModR/M|fixed|prefix|<map name>
4395
4396 The instruction operand encoding style.
4397 """
4398 oInstr = self.ensureInstructionForOpTag(iTagLine);
4399
4400 # Flatten and validate the value.
4401 sEncoding = self.flattenAllSections(aasSections);
4402 if sEncoding in g_kdEncodings:
4403 pass;
4404 elif sEncoding in g_dInstructionMaps:
4405 pass;
4406 elif not _isValidOpcodeByte(sEncoding):
4407 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4408
4409 # Set it.
4410 if oInstr.sEncoding is not None:
4411 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4412 % ( sTag, oInstr.sEncoding, sEncoding,));
4413 oInstr.sEncoding = sEncoding;
4414
4415 _ = iEndLine;
4416 return True;
4417
4418 ## EFlags tag to Instruction attribute name.
4419 kdOpFlagToAttr = {
4420 '@opfltest': 'asFlTest',
4421 '@opflmodify': 'asFlModify',
4422 '@opflundef': 'asFlUndefined',
4423 '@opflset': 'asFlSet',
4424 '@opflclear': 'asFlClear',
4425 };
4426
4427 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4428 """
4429 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4430 Value: <eflags specifier>
4431
4432 """
4433 oInstr = self.ensureInstructionForOpTag(iTagLine);
4434
4435 # Flatten, split up and validate the values.
4436 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4437 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4438 asFlags = [];
4439 else:
4440 fRc = True;
4441 for iFlag, sFlag in enumerate(asFlags):
4442 if sFlag not in g_kdEFlagsMnemonics:
4443 if sFlag.strip() in g_kdEFlagsMnemonics:
4444 asFlags[iFlag] = sFlag.strip();
4445 else:
4446 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4447 if not fRc:
4448 return False;
4449
4450 # Set them.
4451 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4452 if asOld is not None and len(asOld) > 0:
4453 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4454 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4455
4456 _ = iEndLine;
4457 return True;
4458
4459 ## EFLAGS class definitions with their attribute lists.
4460 kdEFlagsClasses = {
4461 'arithmetic': { # add, sub, ...
4462 'asFlTest': [],
4463 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4464 'asFlClear': [],
4465 'asFlSet': [],
4466 'asFlUndefined': [],
4467 },
4468 'arithmetic_carry': { # adc, sbb, ...
4469 'asFlTest': [ 'cf', ],
4470 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4471 'asFlClear': [],
4472 'asFlSet': [],
4473 'asFlUndefined': [],
4474 },
4475 'incdec': {
4476 'asFlTest': [],
4477 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4478 'asFlClear': [],
4479 'asFlSet': [],
4480 'asFlUndefined': [],
4481 },
4482 'division': { ## @todo specify intel/amd differences...
4483 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4484 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4485 'asFlClear': [],
4486 'asFlSet': [],
4487 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4488 },
4489 'multiply': { ## @todo specify intel/amd differences...
4490 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4491 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4492 'asFlClear': [], # between IMUL and MUL.
4493 'asFlSet': [],
4494 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4495 },
4496 'logical': { # and, or, xor, ...
4497 'asFlTest': [],
4498 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4499 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4500 'asFlSet': [],
4501 'asFlUndefined': [ 'af', ],
4502 },
4503 'rotate_1': { # rol and ror with fixed 1 shift count
4504 'asFlTest': [],
4505 'asFlModify': [ 'cf', 'of', ],
4506 'asFlClear': [],
4507 'asFlSet': [],
4508 'asFlUndefined': [],
4509 },
4510 'rotate_count': { # rol and ror w/o fixed 1 shift count
4511 'asFlTest': [],
4512 'asFlModify': [ 'cf', 'of', ],
4513 'asFlClear': [],
4514 'asFlSet': [],
4515 'asFlUndefined': [ 'of', ],
4516 },
4517 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4518 'asFlTest': [ 'cf', ],
4519 'asFlModify': [ 'cf', 'of', ],
4520 'asFlClear': [],
4521 'asFlSet': [],
4522 'asFlUndefined': [],
4523 },
4524 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4525 'asFlTest': [ 'cf', ],
4526 'asFlModify': [ 'cf', 'of', ],
4527 'asFlClear': [],
4528 'asFlSet': [],
4529 'asFlUndefined': [ 'of', ],
4530 },
4531 'shift_1': { # shl, shr or sar with fixed 1 count.
4532 'asFlTest': [],
4533 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4534 'asFlClear': [],
4535 'asFlSet': [],
4536 'asFlUndefined': [ 'af', ],
4537 },
4538 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4539 'asFlTest': [],
4540 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4541 'asFlClear': [],
4542 'asFlSet': [],
4543 'asFlUndefined': [ 'af', 'of', ],
4544 },
4545 'bitmap': { # bt, btc, btr, btc
4546 'asFlTest': [],
4547 'asFlModify': [ 'cf', ],
4548 'asFlClear': [],
4549 'asFlSet': [],
4550 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4551 },
4552 'unchanged': {
4553 'asFlTest': [],
4554 'asFlModify': [],
4555 'asFlClear': [],
4556 'asFlSet': [],
4557 'asFlUndefined': [],
4558 },
4559 };
4560 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4561 """
4562 Tags: @opflclass
4563 Value: arithmetic, logical, ...
4564
4565 """
4566 oInstr = self.ensureInstructionForOpTag(iTagLine);
4567
4568 # Flatten and validate the value.
4569 sClass = self.flattenAllSections(aasSections);
4570 kdAttribs = self.kdEFlagsClasses.get(sClass);
4571 if not kdAttribs:
4572 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4573 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4574
4575 # Set the attributes.
4576 for sAttrib, asFlags in kdAttribs.items():
4577 asOld = getattr(oInstr, sAttrib);
4578 if asOld is not None:
4579 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4580 % (sTag, asOld, asFlags, sAttrib));
4581 setattr(oInstr, sAttrib, asFlags);
4582
4583 _ = iEndLine;
4584 return True;
4585
4586 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4587 """
4588 Tag: @ophints
4589 Value: Comma or space separated list of flags and hints.
4590
4591 This covers the disassembler flags table and more.
4592 """
4593 oInstr = self.ensureInstructionForOpTag(iTagLine);
4594
4595 # Flatten as a space separated list, split it up and validate the values.
4596 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4597 if len(asHints) == 1 and asHints[0].lower() == 'none':
4598 asHints = [];
4599 else:
4600 fRc = True;
4601 for iHint, sHint in enumerate(asHints):
4602 if sHint not in g_kdHints:
4603 if sHint.strip() in g_kdHints:
4604 sHint[iHint] = sHint.strip();
4605 else:
4606 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4607 if not fRc:
4608 return False;
4609
4610 # Append them.
4611 for sHint in asHints:
4612 if sHint not in oInstr.dHints:
4613 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4614 else:
4615 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4616
4617 _ = iEndLine;
4618 return True;
4619
4620 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4621 """
4622 Tag: @opdisenum
4623 Value: OP_XXXX
4624
4625 This is for select a specific (legacy) disassembler enum value for the
4626 instruction.
4627 """
4628 oInstr = self.ensureInstructionForOpTag(iTagLine);
4629
4630 # Flatten and split.
4631 asWords = self.flattenAllSections(aasSections).split();
4632 if len(asWords) != 1:
4633 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4634 if not asWords:
4635 return False;
4636 sDisEnum = asWords[0];
4637 if not self.oReDisEnum.match(sDisEnum):
4638 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4639 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4640
4641 # Set it.
4642 if oInstr.sDisEnum is not None:
4643 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4644 oInstr.sDisEnum = sDisEnum;
4645
4646 _ = iEndLine;
4647 return True;
4648
4649 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4650 """
4651 Tag: @opmincpu
4652 Value: <simple CPU name>
4653
4654 Indicates when this instruction was introduced.
4655 """
4656 oInstr = self.ensureInstructionForOpTag(iTagLine);
4657
4658 # Flatten the value, split into words, make sure there's just one, valid it.
4659 asCpus = self.flattenAllSections(aasSections).split();
4660 if len(asCpus) > 1:
4661 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4662
4663 sMinCpu = asCpus[0];
4664 if sMinCpu in g_kdCpuNames:
4665 oInstr.sMinCpu = sMinCpu;
4666 else:
4667 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4668 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4669
4670 # Set it.
4671 if oInstr.sMinCpu is None:
4672 oInstr.sMinCpu = sMinCpu;
4673 elif oInstr.sMinCpu != sMinCpu:
4674 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4675
4676 _ = iEndLine;
4677 return True;
4678
4679 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4680 """
4681 Tag: @opcpuid
4682 Value: none | <CPUID flag specifier>
4683
4684 CPUID feature bit which is required for the instruction to be present.
4685 """
4686 oInstr = self.ensureInstructionForOpTag(iTagLine);
4687
4688 # Flatten as a space separated list, split it up and validate the values.
4689 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4690 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4691 asCpuIds = [];
4692 else:
4693 fRc = True;
4694 for iCpuId, sCpuId in enumerate(asCpuIds):
4695 if sCpuId not in g_kdCpuIdFlags:
4696 if sCpuId.strip() in g_kdCpuIdFlags:
4697 sCpuId[iCpuId] = sCpuId.strip();
4698 else:
4699 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4700 if not fRc:
4701 return False;
4702
4703 # Append them.
4704 for sCpuId in asCpuIds:
4705 if sCpuId not in oInstr.asCpuIds:
4706 oInstr.asCpuIds.append(sCpuId);
4707 else:
4708 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4709
4710 _ = iEndLine;
4711 return True;
4712
4713 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4714 """
4715 Tag: @opgroup
4716 Value: op_grp1[_subgrp2[_subsubgrp3]]
4717
4718 Instruction grouping.
4719 """
4720 oInstr = self.ensureInstructionForOpTag(iTagLine);
4721
4722 # Flatten as a space separated list, split it up and validate the values.
4723 asGroups = self.flattenAllSections(aasSections).split();
4724 if len(asGroups) != 1:
4725 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4726 sGroup = asGroups[0];
4727 if not self.oReGroupName.match(sGroup):
4728 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4729 % (sTag, sGroup, self.oReGroupName.pattern));
4730
4731 # Set it.
4732 if oInstr.sGroup is not None:
4733 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4734 oInstr.sGroup = sGroup;
4735
4736 _ = iEndLine;
4737 return True;
4738
4739 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4740 """
4741 Tag: @opunused, @opinvalid, @opinvlstyle
4742 Value: <invalid opcode behaviour style>
4743
4744 The @opunused indicates the specification is for a currently unused
4745 instruction encoding.
4746
4747 The @opinvalid indicates the specification is for an invalid currently
4748 instruction encoding (like UD2).
4749
4750 The @opinvlstyle just indicates how CPUs decode the instruction when
4751 not supported (@opcpuid, @opmincpu) or disabled.
4752 """
4753 oInstr = self.ensureInstructionForOpTag(iTagLine);
4754
4755 # Flatten as a space separated list, split it up and validate the values.
4756 asStyles = self.flattenAllSections(aasSections).split();
4757 if len(asStyles) != 1:
4758 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4759 sStyle = asStyles[0];
4760 if sStyle not in g_kdInvalidStyles:
4761 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4762 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4763 # Set it.
4764 if oInstr.sInvalidStyle is not None:
4765 return self.errorComment(iTagLine,
4766 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4767 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4768 oInstr.sInvalidStyle = sStyle;
4769 if sTag == '@opunused':
4770 oInstr.fUnused = True;
4771 elif sTag == '@opinvalid':
4772 oInstr.fInvalid = True;
4773
4774 _ = iEndLine;
4775 return True;
4776
4777 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4778 """
4779 Tag: @optest
4780 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4781 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4782
4783 The main idea here is to generate basic instruction tests.
4784
4785 The probably simplest way of handling the diverse input, would be to use
4786 it to produce size optimized byte code for a simple interpreter that
4787 modifies the register input and output states.
4788
4789 An alternative to the interpreter would be creating multiple tables,
4790 but that becomes rather complicated wrt what goes where and then to use
4791 them in an efficient manner.
4792 """
4793 oInstr = self.ensureInstructionForOpTag(iTagLine);
4794
4795 #
4796 # Do it section by section.
4797 #
4798 for asSectionLines in aasSections:
4799 #
4800 # Sort the input into outputs, inputs and selector conditions.
4801 #
4802 sFlatSection = self.flattenAllSections([asSectionLines,]);
4803 if not sFlatSection:
4804 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4805 continue;
4806 oTest = InstructionTest(oInstr);
4807
4808 asSelectors = [];
4809 asInputs = [];
4810 asOutputs = [];
4811 asCur = asOutputs;
4812 fRc = True;
4813 asWords = sFlatSection.split();
4814 for iWord in range(len(asWords) - 1, -1, -1):
4815 sWord = asWords[iWord];
4816 # Check for array switchers.
4817 if sWord == '->':
4818 if asCur != asOutputs:
4819 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4820 break;
4821 asCur = asInputs;
4822 elif sWord == '/':
4823 if asCur != asInputs:
4824 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4825 break;
4826 asCur = asSelectors;
4827 else:
4828 asCur.insert(0, sWord);
4829
4830 #
4831 # Validate and add selectors.
4832 #
4833 for sCond in asSelectors:
4834 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4835 oSelector = None;
4836 for sOp in TestSelector.kasCompareOps:
4837 off = sCondExp.find(sOp);
4838 if off >= 0:
4839 sVariable = sCondExp[:off];
4840 sValue = sCondExp[off + len(sOp):];
4841 if sVariable in TestSelector.kdVariables:
4842 if sValue in TestSelector.kdVariables[sVariable]:
4843 oSelector = TestSelector(sVariable, sOp, sValue);
4844 else:
4845 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4846 % ( sTag, sValue, sCond,
4847 TestSelector.kdVariables[sVariable].keys(),));
4848 else:
4849 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4850 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4851 break;
4852 if oSelector is not None:
4853 for oExisting in oTest.aoSelectors:
4854 if oExisting.sVariable == oSelector.sVariable:
4855 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4856 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4857 oTest.aoSelectors.append(oSelector);
4858 else:
4859 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4860
4861 #
4862 # Validate outputs and inputs, adding them to the test as we go along.
4863 #
4864 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4865 asValidFieldKinds = [ 'both', sDesc, ];
4866 for sItem in asItems:
4867 oItem = None;
4868 for sOp in TestInOut.kasOperators:
4869 off = sItem.find(sOp);
4870 if off < 0:
4871 continue;
4872 sField = sItem[:off];
4873 sValueType = sItem[off + len(sOp):];
4874 if sField in TestInOut.kdFields \
4875 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4876 asSplit = sValueType.split(':', 1);
4877 sValue = asSplit[0];
4878 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4879 if sType in TestInOut.kdTypes:
4880 oValid = TestInOut.kdTypes[sType].validate(sValue);
4881 if oValid is True:
4882 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4883 oItem = TestInOut(sField, sOp, sValue, sType);
4884 else:
4885 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4886 % ( sTag, sDesc, sItem, ));
4887 else:
4888 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4889 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4890 else:
4891 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4892 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4893 else:
4894 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4895 % ( sTag, sDesc, sField, sItem,
4896 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4897 if asVal[1] in asValidFieldKinds]),));
4898 break;
4899 if oItem is not None:
4900 for oExisting in aoDst:
4901 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4902 self.errorComment(iTagLine,
4903 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4904 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4905 aoDst.append(oItem);
4906 else:
4907 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4908
4909 #
4910 # .
4911 #
4912 if fRc:
4913 oInstr.aoTests.append(oTest);
4914 else:
4915 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4916 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4917 % (sTag, asSelectors, asInputs, asOutputs,));
4918
4919 _ = iEndLine;
4920 return True;
4921
4922 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4923 """
4924 Numbered @optest tag. Either @optest42 or @optest[42].
4925 """
4926 oInstr = self.ensureInstructionForOpTag(iTagLine);
4927
4928 iTest = 0;
4929 if sTag[-1] == ']':
4930 iTest = int(sTag[8:-1]);
4931 else:
4932 iTest = int(sTag[7:]);
4933
4934 if iTest != len(oInstr.aoTests):
4935 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4936 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4937
4938 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4939 """
4940 Tag: @optestign | @optestignore
4941 Value: <value is ignored>
4942
4943 This is a simple trick to ignore a test while debugging another.
4944
4945 See also @oponlytest.
4946 """
4947 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4948 return True;
4949
4950 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4951 """
4952 Tag: @opcopytests
4953 Value: <opstat | function> [..]
4954 Example: @opcopytests add_Eb_Gb
4955
4956 Trick to avoid duplicating tests for different encodings of the same
4957 operation.
4958 """
4959 oInstr = self.ensureInstructionForOpTag(iTagLine);
4960
4961 # Flatten, validate and append the copy job to the instruction. We execute
4962 # them after parsing all the input so we can handle forward references.
4963 asToCopy = self.flattenAllSections(aasSections).split();
4964 if not asToCopy:
4965 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4966 for sToCopy in asToCopy:
4967 if sToCopy not in oInstr.asCopyTests:
4968 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4969 oInstr.asCopyTests.append(sToCopy);
4970 else:
4971 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4972 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4973 else:
4974 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4975
4976 _ = iEndLine;
4977 return True;
4978
4979 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4980 """
4981 Tag: @oponlytest | @oponly
4982 Value: none
4983
4984 Only test instructions with this tag. This is a trick that is handy
4985 for singling out one or two new instructions or tests.
4986
4987 See also @optestignore.
4988 """
4989 oInstr = self.ensureInstructionForOpTag(iTagLine);
4990
4991 # Validate and add instruction to only test dictionary.
4992 sValue = self.flattenAllSections(aasSections).strip();
4993 if sValue:
4994 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4995
4996 if oInstr not in g_aoOnlyTestInstructions:
4997 g_aoOnlyTestInstructions.append(oInstr);
4998
4999 _ = iEndLine;
5000 return True;
5001
5002 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
5003 """
5004 Tag: @opxcpttype
5005 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
5006
5007 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
5008 """
5009 oInstr = self.ensureInstructionForOpTag(iTagLine);
5010
5011 # Flatten as a space separated list, split it up and validate the values.
5012 asTypes = self.flattenAllSections(aasSections).split();
5013 if len(asTypes) != 1:
5014 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
5015 sType = asTypes[0];
5016 if sType not in g_kdXcptTypes:
5017 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
5018 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
5019 # Set it.
5020 if oInstr.sXcptType is not None:
5021 return self.errorComment(iTagLine,
5022 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
5023 % ( sTag, oInstr.sXcptType, sType,));
5024 oInstr.sXcptType = sType;
5025
5026 _ = iEndLine;
5027 return True;
5028
5029 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5030 """
5031 Tag: @opfunction
5032 Value: <VMM function name>
5033
5034 This is for explicitly setting the IEM function name. Normally we pick
5035 this up from the FNIEMOP_XXX macro invocation after the description, or
5036 generate it from the mnemonic and operands.
5037
5038 It it thought it maybe necessary to set it when specifying instructions
5039 which implementation isn't following immediately or aren't implemented yet.
5040 """
5041 oInstr = self.ensureInstructionForOpTag(iTagLine);
5042
5043 # Flatten and validate the value.
5044 sFunction = self.flattenAllSections(aasSections);
5045 if not self.oReFunctionName.match(sFunction):
5046 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5047 % (sTag, sFunction, self.oReFunctionName.pattern));
5048
5049 if oInstr.sFunction is not None:
5050 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5051 % (sTag, oInstr.sFunction, sFunction,));
5052 oInstr.sFunction = sFunction;
5053
5054 _ = iEndLine;
5055 return True;
5056
5057 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5058 """
5059 Tag: @opstats
5060 Value: <VMM statistics base name>
5061
5062 This is for explicitly setting the statistics name. Normally we pick
5063 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5064 the mnemonic and operands.
5065
5066 It it thought it maybe necessary to set it when specifying instructions
5067 which implementation isn't following immediately or aren't implemented yet.
5068 """
5069 oInstr = self.ensureInstructionForOpTag(iTagLine);
5070
5071 # Flatten and validate the value.
5072 sStats = self.flattenAllSections(aasSections);
5073 if not self.oReStatsName.match(sStats):
5074 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5075 % (sTag, sStats, self.oReStatsName.pattern));
5076
5077 if oInstr.sStats is not None:
5078 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5079 % (sTag, oInstr.sStats, sStats,));
5080 oInstr.sStats = sStats;
5081
5082 _ = iEndLine;
5083 return True;
5084
5085 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5086 """
5087 Tag: @opdone
5088 Value: none
5089
5090 Used to explictily flush the instructions that have been specified.
5091 """
5092 sFlattened = self.flattenAllSections(aasSections);
5093 if sFlattened != '':
5094 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5095 _ = sTag; _ = iEndLine;
5096 return self.doneInstructions();
5097
5098 ## @}
5099
5100
5101 def parseComment(self):
5102 """
5103 Parse the current comment (self.sComment).
5104
5105 If it's a opcode specifiying comment, we reset the macro stuff.
5106 """
5107 #
5108 # Reject if comment doesn't seem to contain anything interesting.
5109 #
5110 if self.sComment.find('Opcode') < 0 \
5111 and self.sComment.find('@') < 0:
5112 return False;
5113
5114 #
5115 # Split the comment into lines, removing leading asterisks and spaces.
5116 # Also remove leading and trailing empty lines.
5117 #
5118 asLines = self.sComment.split('\n');
5119 for iLine, sLine in enumerate(asLines):
5120 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5121
5122 while asLines and not asLines[0]:
5123 self.iCommentLine += 1;
5124 asLines.pop(0);
5125
5126 while asLines and not asLines[-1]:
5127 asLines.pop(len(asLines) - 1);
5128
5129 #
5130 # Check for old style: Opcode 0x0f 0x12
5131 #
5132 if asLines[0].startswith('Opcode '):
5133 self.parseCommentOldOpcode(asLines);
5134
5135 #
5136 # Look for @op* tagged data.
5137 #
5138 cOpTags = 0;
5139 sFlatDefault = None;
5140 sCurTag = '@default';
5141 iCurTagLine = 0;
5142 asCurSection = [];
5143 aasSections = [ asCurSection, ];
5144 for iLine, sLine in enumerate(asLines):
5145 if not sLine.startswith('@'):
5146 if sLine:
5147 asCurSection.append(sLine);
5148 elif asCurSection:
5149 asCurSection = [];
5150 aasSections.append(asCurSection);
5151 else:
5152 #
5153 # Process the previous tag.
5154 #
5155 if not asCurSection and len(aasSections) > 1:
5156 aasSections.pop(-1);
5157 if sCurTag in self.dTagHandlers:
5158 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5159 cOpTags += 1;
5160 elif sCurTag.startswith('@op'):
5161 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5162 elif sCurTag == '@default':
5163 sFlatDefault = self.flattenAllSections(aasSections);
5164 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5165 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5166 elif sCurTag in ['@encoding', '@opencoding']:
5167 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5168
5169 #
5170 # New tag.
5171 #
5172 asSplit = sLine.split(None, 1);
5173 sCurTag = asSplit[0].lower();
5174 if len(asSplit) > 1:
5175 asCurSection = [asSplit[1],];
5176 else:
5177 asCurSection = [];
5178 aasSections = [asCurSection, ];
5179 iCurTagLine = iLine;
5180
5181 #
5182 # Process the final tag.
5183 #
5184 if not asCurSection and len(aasSections) > 1:
5185 aasSections.pop(-1);
5186 if sCurTag in self.dTagHandlers:
5187 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5188 cOpTags += 1;
5189 elif sCurTag.startswith('@op'):
5190 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5191 elif sCurTag == '@default':
5192 sFlatDefault = self.flattenAllSections(aasSections);
5193
5194 #
5195 # Don't allow default text in blocks containing @op*.
5196 #
5197 if cOpTags > 0 and sFlatDefault:
5198 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5199
5200 return True;
5201
5202 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5203 """
5204 Parses a macro invocation.
5205
5206 Returns three values:
5207 1. A list of macro arguments, where the zero'th is the macro name.
5208 2. The offset following the macro invocation, into sInvocation of
5209 this is on the same line or into the last line if it is on a
5210 different line.
5211 3. Number of additional lines the invocation spans (i.e. zero if
5212 it is all contained within sInvocation).
5213 """
5214 # First the name.
5215 offOpen = sInvocation.find('(', offStartInvocation);
5216 if offOpen <= offStartInvocation:
5217 self.raiseError("macro invocation open parenthesis not found");
5218 sName = sInvocation[offStartInvocation:offOpen].strip();
5219 if not self.oReMacroName.match(sName):
5220 self.raiseError("invalid macro name '%s'" % (sName,));
5221 asRet = [sName, ];
5222
5223 # Arguments.
5224 iLine = self.iLine;
5225 cDepth = 1;
5226 off = offOpen + 1;
5227 offStart = off;
5228 offCurLn = 0;
5229 chQuote = None;
5230 while cDepth > 0:
5231 if off >= len(sInvocation):
5232 if iLine >= len(self.asLines):
5233 self.error('macro invocation beyond end of file');
5234 return (asRet, off - offCurLn, iLine - self.iLine);
5235 offCurLn = off;
5236 sInvocation += self.asLines[iLine];
5237 iLine += 1;
5238 ch = sInvocation[off];
5239
5240 if chQuote:
5241 if ch == '\\' and off + 1 < len(sInvocation):
5242 off += 1;
5243 elif ch == chQuote:
5244 chQuote = None;
5245 elif ch in ('"', '\'',):
5246 chQuote = ch;
5247 elif ch in (',', ')',):
5248 if cDepth == 1:
5249 asRet.append(sInvocation[offStart:off].strip());
5250 offStart = off + 1;
5251 if ch == ')':
5252 cDepth -= 1;
5253 elif ch == '(':
5254 cDepth += 1;
5255 off += 1;
5256
5257 return (asRet, off - offCurLn, iLine - self.iLine);
5258
5259 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5260 """
5261 Returns (None, len(sCode), 0) if not found, otherwise the
5262 parseMacroInvocation() return value.
5263 """
5264 offHit = sCode.find(sMacro, offStart);
5265 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5266 return self.parseMacroInvocation(sCode, offHit);
5267 return (None, len(sCode), 0);
5268
5269 def findAndParseMacroInvocation(self, sCode, sMacro):
5270 """
5271 Returns None if not found, arguments as per parseMacroInvocation if found.
5272 """
5273 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5274
5275 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5276 """
5277 Returns same as findAndParseMacroInvocation.
5278 """
5279 for sMacro in asMacro:
5280 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5281 if asRet is not None:
5282 return asRet;
5283 return None;
5284
5285 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5286 sDisHints, sIemHints, asOperands):
5287 """
5288 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5289 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5290 """
5291 #
5292 # Some invocation checks.
5293 #
5294 if sUpper != sUpper.upper():
5295 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5296 if sLower != sLower.lower():
5297 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5298 if sUpper.lower() != sLower:
5299 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5300 if not self.oReMnemonic.match(sLower):
5301 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5302
5303 #
5304 # Check if sIemHints tells us to not consider this macro invocation.
5305 #
5306 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5307 return True;
5308
5309 # Apply to the last instruction only for now.
5310 if not self.aoCurInstrs:
5311 self.addInstruction();
5312 oInstr = self.aoCurInstrs[-1];
5313 if oInstr.iLineMnemonicMacro == -1:
5314 oInstr.iLineMnemonicMacro = self.iLine;
5315 else:
5316 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5317 % (sMacro, oInstr.iLineMnemonicMacro,));
5318
5319 # Mnemonic
5320 if oInstr.sMnemonic is None:
5321 oInstr.sMnemonic = sLower;
5322 elif oInstr.sMnemonic != sLower:
5323 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5324
5325 # Process operands.
5326 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5327 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5328 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5329 for iOperand, sType in enumerate(asOperands):
5330 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5331 if sWhere is None:
5332 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5333 if iOperand < len(oInstr.aoOperands): # error recovery.
5334 sWhere = oInstr.aoOperands[iOperand].sWhere;
5335 sType = oInstr.aoOperands[iOperand].sType;
5336 else:
5337 sWhere = 'reg';
5338 sType = 'Gb';
5339 if iOperand == len(oInstr.aoOperands):
5340 oInstr.aoOperands.append(Operand(sWhere, sType))
5341 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5342 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5343 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5344 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5345
5346 # Encoding.
5347 if sForm not in g_kdIemForms:
5348 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5349 else:
5350 if oInstr.sEncoding is None:
5351 oInstr.sEncoding = g_kdIemForms[sForm][0];
5352 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5353 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5354 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5355
5356 # Check the parameter locations for the encoding.
5357 if g_kdIemForms[sForm][1] is not None:
5358 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5359 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5360 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5361 else:
5362 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5363 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5364 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5365 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5366 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5367 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5368 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5369 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5370 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5371 or sForm.replace('VEX','').find('V') < 0) ):
5372 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5373 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5374 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5375 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5376 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5377 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5378 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5379 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5380 oInstr.aoOperands[iOperand].sWhere));
5381
5382
5383 # Check @opcodesub
5384 if oInstr.sSubOpcode \
5385 and g_kdIemForms[sForm][2] \
5386 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5387 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5388 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5389
5390 # Stats.
5391 if not self.oReStatsName.match(sStats):
5392 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5393 elif oInstr.sStats is None:
5394 oInstr.sStats = sStats;
5395 elif oInstr.sStats != sStats:
5396 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5397 % (sMacro, oInstr.sStats, sStats,));
5398
5399 # Process the hints (simply merge with @ophints w/o checking anything).
5400 for sHint in sDisHints.split('|'):
5401 sHint = sHint.strip();
5402 if sHint.startswith('DISOPTYPE_'):
5403 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5404 if sShortHint in g_kdHints:
5405 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5406 else:
5407 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5408 elif sHint != '0':
5409 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5410
5411 for sHint in sIemHints.split('|'):
5412 sHint = sHint.strip();
5413 if sHint.startswith('IEMOPHINT_'):
5414 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5415 if sShortHint in g_kdHints:
5416 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5417 else:
5418 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5419 elif sHint != '0':
5420 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5421
5422 _ = sAsm;
5423 return True;
5424
5425 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5426 """
5427 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5428 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5429 """
5430 if not asOperands:
5431 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5432 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5433 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5434
5435 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5436 """
5437 Process a IEM_MC_BEGIN macro invocation.
5438 """
5439 if self.fDebugMc:
5440 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5441 #self.debug('%s<eos>' % (sCode,));
5442
5443 # Check preconditions.
5444 if not self.oCurFunction:
5445 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5446 if self.oCurMcBlock:
5447 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5448
5449 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5450 cchIndent = offBeginStatementInCodeStr;
5451 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5452 if offPrevNewline >= 0:
5453 cchIndent -= offPrevNewline + 1;
5454 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5455
5456 # Start a new block.
5457 # But don't add it to the list unless the context matches the host architecture.
5458 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5459 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5460 cchIndent = cchIndent);
5461 try:
5462 if ( not self.aoCppCondStack
5463 or not self.sHostArch
5464 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5465 g_aoMcBlocks.append(self.oCurMcBlock);
5466 self.cTotalMcBlocks += 1;
5467 except Exception as oXcpt:
5468 self.raiseError(oXcpt.args[0]);
5469
5470 if self.oCurMcBlock.oInstruction:
5471 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5472 self.iMcBlockInFunc += 1;
5473 return True;
5474
5475 @staticmethod
5476 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5477 """
5478 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5479 extracting a statement block from a string that's the result of macro
5480 expansion and therefore contains multiple "sub-lines" as it were.
5481
5482 Returns list of lines covering offBegin thru offEnd in sRawLine.
5483 """
5484
5485 off = sRawLine.find('\n', offEnd);
5486 if off > 0:
5487 sRawLine = sRawLine[:off + 1];
5488
5489 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5490 sRawLine = sRawLine[off:];
5491 if not sRawLine.strip().startswith(sBeginStmt):
5492 sRawLine = sRawLine[offBegin - off:]
5493
5494 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5495
5496 def workerIemMcEnd(self, offEndStatementInLine):
5497 """
5498 Process a IEM_MC_END macro invocation.
5499 """
5500 if self.fDebugMc:
5501 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5502
5503 # Check preconditions.
5504 if not self.oCurMcBlock:
5505 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5506
5507 #
5508 # HACK ALERT! For blocks originating from macro expansion the start and
5509 # end line will be the same, but the line has multiple
5510 # newlines inside it. So, we have to do some extra tricks
5511 # to get the lines out of there. We ASSUME macros aren't
5512 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5513 #
5514 if self.iLine > self.oCurMcBlock.iBeginLine:
5515 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5516 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5517 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5518
5519 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5520 # so we can deal correctly with IEM_MC_END below and everything else.
5521 for sLine in asLines:
5522 cNewLines = sLine.count('\n');
5523 assert cNewLines > 0;
5524 if cNewLines > 1:
5525 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5526 self.oCurMcBlock.offBeginLine,
5527 offEndStatementInLine
5528 + sum(len(s) for s in asLines)
5529 - len(asLines[-1]));
5530 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5531 break;
5532 else:
5533 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5534 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5535 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5536
5537 #
5538 # Strip anything following the IEM_MC_END(); statement in the final line,
5539 # so that we don't carry on any trailing 'break' after macro expansions
5540 # like for iemOp_movsb_Xb_Yb.
5541 #
5542 while asLines[-1].strip() == '':
5543 asLines.pop();
5544 sFinal = asLines[-1];
5545 offFinalEnd = sFinal.find('IEM_MC_END');
5546 offEndInFinal = offFinalEnd;
5547 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5548 offFinalEnd += len('IEM_MC_END');
5549
5550 while sFinal[offFinalEnd].isspace():
5551 offFinalEnd += 1;
5552 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5553 offFinalEnd += 1;
5554
5555 while sFinal[offFinalEnd].isspace():
5556 offFinalEnd += 1;
5557 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5558 offFinalEnd += 1;
5559
5560 while sFinal[offFinalEnd].isspace():
5561 offFinalEnd += 1;
5562 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5563 offFinalEnd += 1;
5564
5565 asLines[-1] = sFinal[: offFinalEnd];
5566
5567 #
5568 # Complete and discard the current block.
5569 #
5570 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5571 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5572 self.oCurMcBlock = None;
5573 return True;
5574
5575 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5576 """
5577 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5578 """
5579 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5580 if self.fDebugMc:
5581 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5582 #self.debug('%s<eos>' % (sCode,));
5583
5584 # Check preconditions.
5585 if not self.oCurFunction:
5586 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5587 if self.oCurMcBlock:
5588 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5589
5590 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5591 cchIndent = offBeginStatementInCodeStr;
5592 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5593 if offPrevNewline >= 0:
5594 cchIndent -= offPrevNewline + 1;
5595 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5596
5597 # Start a new block.
5598 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5599 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5600 cchIndent = cchIndent, fDeferToCImpl = True);
5601
5602 # Parse the statment.
5603 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5604 if asArgs is None:
5605 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5606 if len(asArgs) != cParams + 4:
5607 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5608 % (sStmt, len(asArgs), cParams + 4, asArgs));
5609
5610 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5611
5612 # These MCs are not typically part of macro expansions, but let's get
5613 # it out of the way immediately if it's the case.
5614 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5615 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5616 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5617 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5618 asLines[-1] = asLines[-1][:offAfter + 1];
5619 else:
5620 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5621 offAfter, sStmt);
5622 assert asLines[-1].find(';') >= 0;
5623 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5624
5625 assert asLines[0].find(sStmt) >= 0;
5626 #if not asLines[0].strip().startswith(sStmt):
5627 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5628
5629 # Advance to the line with the closing ')'.
5630 self.iLine += cLines;
5631
5632 # Complete the block.
5633 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5634
5635 g_aoMcBlocks.append(oMcBlock);
5636 if oMcBlock.oInstruction:
5637 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5638 self.cTotalMcBlocks += 1;
5639 self.iMcBlockInFunc += 1;
5640
5641 return True;
5642
5643 def workerStartFunction(self, asArgs):
5644 """
5645 Deals with the start of a decoder function.
5646
5647 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5648 macros, so we get a argument list for these where the 0th argument is the
5649 macro name.
5650 """
5651 # Complete any existing function.
5652 if self.oCurFunction:
5653 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5654
5655 # Create the new function.
5656 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5657 return True;
5658
5659 def checkCodeForMacro(self, sCode, offLine):
5660 """
5661 Checks code for relevant macro invocation.
5662 """
5663
5664 #
5665 # Scan macro invocations.
5666 #
5667 if sCode.find('(') > 0:
5668 # Look for instruction decoder function definitions. ASSUME single line.
5669 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5670 [ 'FNIEMOP_DEF',
5671 'FNIEMOPRM_DEF',
5672 'FNIEMOP_STUB',
5673 'FNIEMOP_STUB_1',
5674 'FNIEMOP_UD_STUB',
5675 'FNIEMOP_UD_STUB_1' ]);
5676 if asArgs is not None:
5677 self.workerStartFunction(asArgs);
5678 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5679
5680 if not self.aoCurInstrs:
5681 self.addInstruction();
5682 for oInstr in self.aoCurInstrs:
5683 if oInstr.iLineFnIemOpMacro == -1:
5684 oInstr.iLineFnIemOpMacro = self.iLine;
5685 else:
5686 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5687 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5688 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5689 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5690 if asArgs[0].find('STUB') > 0:
5691 self.doneInstructions(fEndOfFunction = True);
5692 return True;
5693
5694 # Check for worker function definitions, so we can get a context for MC blocks.
5695 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5696 [ 'FNIEMOP_DEF_1',
5697 'FNIEMOP_DEF_2', ]);
5698 if asArgs is not None:
5699 self.workerStartFunction(asArgs);
5700 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5701 return True;
5702
5703 # IEMOP_HLP_DONE_VEX_DECODING_*
5704 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5705 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5706 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5707 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5708 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5709 ]);
5710 if asArgs is not None:
5711 sMacro = asArgs[0];
5712 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5713 for oInstr in self.aoCurInstrs:
5714 if 'vex_l_zero' not in oInstr.dHints:
5715 if oInstr.iLineMnemonicMacro >= 0:
5716 self.errorOnLine(oInstr.iLineMnemonicMacro,
5717 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5718 oInstr.dHints['vex_l_zero'] = True;
5719
5720 #
5721 # IEMOP_MNEMONIC*
5722 #
5723 if sCode.find('IEMOP_MNEMONIC') >= 0:
5724 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5725 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5726 if asArgs is not None:
5727 if len(self.aoCurInstrs) == 1:
5728 oInstr = self.aoCurInstrs[0];
5729 if oInstr.sStats is None:
5730 oInstr.sStats = asArgs[1];
5731 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5732
5733 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5734 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5735 if asArgs is not None:
5736 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5737 asArgs[7], []);
5738 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5739 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5740 if asArgs is not None:
5741 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5742 asArgs[8], [asArgs[6],]);
5743 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5744 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5745 if asArgs is not None:
5746 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5747 asArgs[9], [asArgs[6], asArgs[7]]);
5748 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5749 # a_fIemHints)
5750 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5751 if asArgs is not None:
5752 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5753 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5754 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5755 # a_fIemHints)
5756 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5757 if asArgs is not None:
5758 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5759 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5760
5761 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5762 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5763 if asArgs is not None:
5764 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5765 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5766 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5767 if asArgs is not None:
5768 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5769 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5770 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5771 if asArgs is not None:
5772 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5773 [asArgs[4], asArgs[5],]);
5774 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5775 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5776 if asArgs is not None:
5777 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5778 [asArgs[4], asArgs[5], asArgs[6],]);
5779 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5780 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5781 if asArgs is not None:
5782 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5783 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5784
5785 #
5786 # IEM_MC_BEGIN + IEM_MC_END.
5787 # We must support multiple instances per code snippet.
5788 #
5789 offCode = sCode.find('IEM_MC_');
5790 if offCode >= 0:
5791 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5792 if oMatch.group(1) == 'END':
5793 self.workerIemMcEnd(offLine + oMatch.start());
5794 elif oMatch.group(1) == 'BEGIN':
5795 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5796 else:
5797 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5798 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5799 return True;
5800
5801 return False;
5802
5803 def workerPreprocessorRecreateMacroRegex(self):
5804 """
5805 Recreates self.oReMacros when self.dMacros changes.
5806 """
5807 if self.dMacros:
5808 sRegex = '';
5809 for sName, oMacro in self.dMacros.items():
5810 if sRegex:
5811 sRegex += r'|' + sName;
5812 else:
5813 sRegex = r'\b(' + sName;
5814 if oMacro.asArgs is not None:
5815 sRegex += r'\s*\(';
5816 else:
5817 sRegex += r'\b';
5818 sRegex += ')';
5819 self.oReMacros = re.compile(sRegex);
5820 else:
5821 self.oReMacros = None;
5822 return True;
5823
5824 def workerPreprocessorDefine(self, sRest):
5825 """
5826 Handles a macro #define, the sRest is what follows after the directive word.
5827 """
5828 assert sRest[-1] == '\n';
5829
5830 #
5831 # If using line continutation, just concat all the lines together,
5832 # preserving the newline character but not the escaping.
5833 #
5834 iLineStart = self.iLine;
5835 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5836 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5837 self.iLine += 1;
5838 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5839
5840 #
5841 # Use regex to split out the name, argument list and body.
5842 # If this fails, we assume it's a simple macro.
5843 #
5844 oMatch = self.oReHashDefine2.match(sRest);
5845 if oMatch:
5846 sAllArgs = oMatch.group(2).strip();
5847 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5848 sBody = oMatch.group(3);
5849 else:
5850 oMatch = self.oReHashDefine3.match(sRest);
5851 if not oMatch:
5852 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5853 return self.error('bogus macro definition: %s' % (sRest,));
5854 asArgs = None;
5855 sBody = oMatch.group(2);
5856 sName = oMatch.group(1);
5857 assert sName == sName.strip();
5858 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5859
5860 #
5861 # Is this of any interest to us? We do NOT support MC blocks wihtin
5862 # nested macro expansion, just to avoid lots of extra work.
5863 #
5864 # There is only limited support for macros expanding to partial MC blocks.
5865 #
5866 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5867 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5868 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5869 # siblings in the recompiler. This is a lot simpler than nested macro
5870 # expansion and lots of heuristics for locating all the relevant macros.
5871 # Also, this way we don't produce lots of unnecessary threaded functions.
5872 #
5873 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5874 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5875 return True;
5876
5877 #
5878 # Add the macro.
5879 #
5880 if self.fDebugPreproc:
5881 self.debug('#define %s on line %u' % (sName, self.iLine,));
5882 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5883 return self.workerPreprocessorRecreateMacroRegex();
5884
5885 def workerPreprocessorUndef(self, sRest):
5886 """
5887 Handles a macro #undef, the sRest is what follows after the directive word.
5888 """
5889 # Quick comment strip and isolate the name.
5890 offSlash = sRest.find('/');
5891 if offSlash > 0:
5892 sRest = sRest[:offSlash];
5893 sName = sRest.strip();
5894
5895 # Remove the macro if we're clocking it.
5896 if sName in self.dMacros:
5897 if self.fDebugPreproc:
5898 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5899 del self.dMacros[sName];
5900 return self.workerPreprocessorRecreateMacroRegex();
5901
5902 return True;
5903
5904 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5905 """
5906 Handles an #if, #ifdef, #ifndef or #elif directive.
5907 """
5908 #
5909 # Sanity check #elif.
5910 #
5911 if sDirective == 'elif':
5912 if len(self.aoCppCondStack) == 0:
5913 self.raiseError('#elif without #if');
5914 if self.aoCppCondStack[-1].fInElse:
5915 self.raiseError('#elif after #else');
5916
5917 #
5918 # If using line continutation, just concat all the lines together,
5919 # stripping both the newline and escape characters.
5920 #
5921 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5922 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5923 self.iLine += 1;
5924
5925 # Strip it of all comments and leading and trailing blanks.
5926 sRest = self.stripComments(sRest).strip();
5927
5928 #
5929 # Stash it.
5930 #
5931 try:
5932 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5933 except Exception as oXcpt:
5934 self.raiseError(oXcpt.args[0]);
5935
5936 if sDirective == 'elif':
5937 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5938 else:
5939 self.aoCppCondStack.append(oPreprocCond);
5940
5941 return True;
5942
5943 def workerPreprocessorElse(self):
5944 """
5945 Handles an #else directive.
5946 """
5947 if len(self.aoCppCondStack) == 0:
5948 self.raiseError('#else without #if');
5949 if self.aoCppCondStack[-1].fInElse:
5950 self.raiseError('Another #else after #else');
5951
5952 self.aoCppCondStack[-1].fInElse = True;
5953 return True;
5954
5955 def workerPreprocessorEndif(self):
5956 """
5957 Handles an #endif directive.
5958 """
5959 if len(self.aoCppCondStack) == 0:
5960 self.raiseError('#endif without #if');
5961
5962 self.aoCppCondStack.pop();
5963 return True;
5964
5965 def checkPreprocessorDirective(self, sLine):
5966 """
5967 Handles a preprocessor directive.
5968 """
5969 # Skip past the preprocessor hash.
5970 off = sLine.find('#');
5971 assert off >= 0;
5972 off += 1;
5973 while off < len(sLine) and sLine[off].isspace():
5974 off += 1;
5975
5976 # Extract the directive.
5977 offDirective = off;
5978 while off < len(sLine) and not sLine[off].isspace():
5979 off += 1;
5980 sDirective = sLine[offDirective:off];
5981 if self.fDebugPreproc:
5982 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5983
5984 # Skip spaces following it to where the arguments/whatever starts.
5985 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5986 off += 1;
5987 sTail = sLine[off:];
5988
5989 # Handle the directive.
5990 if sDirective == 'define':
5991 return self.workerPreprocessorDefine(sTail);
5992 if sDirective == 'undef':
5993 return self.workerPreprocessorUndef(sTail);
5994 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5995 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5996 if sDirective == 'else':
5997 return self.workerPreprocessorElse();
5998 if sDirective == 'endif':
5999 return self.workerPreprocessorEndif();
6000
6001 if self.fDebugPreproc:
6002 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
6003 return False;
6004
6005 def expandMacros(self, sLine, oMatch):
6006 """
6007 Expands macros we know about in the given line.
6008 Currently we ASSUME there is only one and that is what oMatch matched.
6009 """
6010 #
6011 # Get our bearings.
6012 #
6013 offMatch = oMatch.start();
6014 sName = oMatch.group(1);
6015 assert sName == sLine[oMatch.start() : oMatch.end()];
6016 fWithArgs = sName.endswith('(');
6017 if fWithArgs:
6018 sName = sName[:-1].strip();
6019 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
6020
6021 #
6022 # Deal with simple macro invocations w/o parameters.
6023 #
6024 if not fWithArgs:
6025 if self.fDebugPreproc:
6026 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6027 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6028
6029 #
6030 # Complicated macro with parameters.
6031 # Start by extracting the parameters. ASSUMES they are all on the same line!
6032 #
6033 cLevel = 1;
6034 offCur = oMatch.end();
6035 offCurArg = offCur;
6036 asArgs = [];
6037 while True:
6038 if offCur >= len(sLine):
6039 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6040 ch = sLine[offCur];
6041 if ch == '(':
6042 cLevel += 1;
6043 elif ch == ')':
6044 cLevel -= 1;
6045 if cLevel == 0:
6046 asArgs.append(sLine[offCurArg:offCur].strip());
6047 break;
6048 elif ch == ',' and cLevel == 1:
6049 asArgs.append(sLine[offCurArg:offCur].strip());
6050 offCurArg = offCur + 1;
6051 offCur += 1;
6052 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6053 asArgs = [];
6054 if len(oMacro.asArgs) != len(asArgs):
6055 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6056
6057 #
6058 # Do the expanding.
6059 #
6060 if self.fDebugPreproc:
6061 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6062 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6063
6064 def parse(self):
6065 """
6066 Parses the given file.
6067
6068 Returns number or errors.
6069 Raises exception on fatal trouble.
6070 """
6071 #self.debug('Parsing %s' % (self.sSrcFile,));
6072
6073 #
6074 # Loop thru the lines.
6075 #
6076 # Please mind that self.iLine may be updated by checkCodeForMacro and
6077 # other worker methods.
6078 #
6079 while self.iLine < len(self.asLines):
6080 sLine = self.asLines[self.iLine];
6081 self.iLine += 1;
6082 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6083
6084 # Expand macros we know about if we're currently in code.
6085 if self.iState == self.kiCode and self.oReMacros:
6086 oMatch = self.oReMacros.search(sLine);
6087 if oMatch:
6088 sLine = self.expandMacros(sLine, oMatch);
6089 if self.fDebugPreproc:
6090 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6091 self.asLines[self.iLine - 1] = sLine;
6092
6093 # Check for preprocessor directives before comments and other stuff.
6094 # ASSUMES preprocessor directives doesn't end with multiline comments.
6095 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6096 if self.fDebugPreproc:
6097 self.debug('line %d: preproc' % (self.iLine,));
6098 self.checkPreprocessorDirective(sLine);
6099 else:
6100 # Look for comments.
6101 offSlash = sLine.find('/');
6102 if offSlash >= 0:
6103 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6104 offLine = 0;
6105 while offLine < len(sLine):
6106 if self.iState == self.kiCode:
6107 # Look for substantial multiline comment so we pass the following MC as a whole line:
6108 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6109 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6110 offHit = sLine.find('/*', offLine);
6111 while offHit >= 0:
6112 offEnd = sLine.find('*/', offHit + 2);
6113 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6114 break;
6115 offHit = sLine.find('/*', offEnd);
6116
6117 if offHit >= 0:
6118 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6119 self.sComment = '';
6120 self.iCommentLine = self.iLine;
6121 self.iState = self.kiCommentMulti;
6122 offLine = offHit + 2;
6123 else:
6124 self.checkCodeForMacro(sLine[offLine:], offLine);
6125 offLine = len(sLine);
6126
6127 elif self.iState == self.kiCommentMulti:
6128 offHit = sLine.find('*/', offLine);
6129 if offHit >= 0:
6130 self.sComment += sLine[offLine:offHit];
6131 self.iState = self.kiCode;
6132 offLine = offHit + 2;
6133 self.parseComment();
6134 else:
6135 self.sComment += sLine[offLine:];
6136 offLine = len(sLine);
6137 else:
6138 assert False;
6139 # C++ line comment.
6140 elif offSlash > 0:
6141 self.checkCodeForMacro(sLine[:offSlash], 0);
6142
6143 # No slash, but append the line if in multi-line comment.
6144 elif self.iState == self.kiCommentMulti:
6145 #self.debug('line %d: multi' % (self.iLine,));
6146 self.sComment += sLine;
6147
6148 # No slash, but check code line for relevant macro.
6149 elif ( self.iState == self.kiCode
6150 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6151 #self.debug('line %d: macro' % (self.iLine,));
6152 self.checkCodeForMacro(sLine, 0);
6153
6154 # If the line is a '}' in the first position, complete the instructions.
6155 elif self.iState == self.kiCode and sLine[0] == '}':
6156 #self.debug('line %d: }' % (self.iLine,));
6157 self.doneInstructions(fEndOfFunction = True);
6158
6159 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6160 # so we can check/add @oppfx info from it.
6161 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6162 self.parseFunctionTable(sLine);
6163
6164 self.doneInstructions(fEndOfFunction = True);
6165 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6166 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6167 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6168 return self.printErrors();
6169
6170# Some sanity checking.
6171def __sanityCheckEFlagsClasses():
6172 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6173 for sAttrib, asFlags in dLists.items():
6174 for sFlag in asFlags:
6175 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6176__sanityCheckEFlagsClasses();
6177
6178## The parsed content of IEMAllInstCommonBodyMacros.h.
6179g_oParsedCommonBodyMacros = None # type: SimpleParser
6180
6181def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6182 """
6183 Parses one source file for instruction specfications.
6184 """
6185 #
6186 # Read sSrcFile into a line array.
6187 #
6188 try:
6189 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6190 except Exception as oXcpt:
6191 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6192 try:
6193 asLines = oFile.readlines();
6194 except Exception as oXcpt:
6195 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6196 finally:
6197 oFile.close();
6198
6199 #
6200 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6201 # can use the macros from it when processing the other files.
6202 #
6203 global g_oParsedCommonBodyMacros;
6204 if g_oParsedCommonBodyMacros is None:
6205 # Locate the file.
6206 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6207 if not os.path.isfile(sCommonBodyMacros):
6208 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6209
6210 # Read it.
6211 try:
6212 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6213 asIncFiles = oIncFile.readlines();
6214 except Exception as oXcpt:
6215 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6216
6217 # Parse it.
6218 try:
6219 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6220 if oParser.parse() != 0:
6221 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6222 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6223 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6224 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6225 oParser.cTotalMcBlocks,
6226 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6227 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6228 except ParserException as oXcpt:
6229 print(str(oXcpt), file = sys.stderr);
6230 raise;
6231 g_oParsedCommonBodyMacros = oParser;
6232
6233 #
6234 # Do the parsing.
6235 #
6236 try:
6237 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6238 return (oParser.parse(), oParser) ;
6239 except ParserException as oXcpt:
6240 print(str(oXcpt), file = sys.stderr);
6241 raise;
6242
6243
6244def __doTestCopying():
6245 """
6246 Executes the asCopyTests instructions.
6247 """
6248 asErrors = [];
6249 for oDstInstr in g_aoAllInstructions:
6250 if oDstInstr.asCopyTests:
6251 for sSrcInstr in oDstInstr.asCopyTests:
6252 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6253 if oSrcInstr:
6254 aoSrcInstrs = [oSrcInstr,];
6255 else:
6256 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6257 if aoSrcInstrs:
6258 for oSrcInstr in aoSrcInstrs:
6259 if oSrcInstr != oDstInstr:
6260 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6261 else:
6262 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6263 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6264 else:
6265 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6266 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6267
6268 if asErrors:
6269 sys.stderr.write(u''.join(asErrors));
6270 return len(asErrors);
6271
6272
6273def __applyOnlyTest():
6274 """
6275 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6276 all other instructions so that only these get tested.
6277 """
6278 if g_aoOnlyTestInstructions:
6279 for oInstr in g_aoAllInstructions:
6280 if oInstr.aoTests:
6281 if oInstr not in g_aoOnlyTestInstructions:
6282 oInstr.aoTests = [];
6283 return 0;
6284
6285## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6286g_aaoAllInstrFilesAndDefaultMapAndSet = (
6287 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6288 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6289 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6290 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6291 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6292 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6293 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6294 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6295 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6296);
6297
6298def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6299 """
6300 Parses all the IEMAllInstruction*.cpp.h files.
6301
6302 Returns a list of the parsers on success.
6303 Raises exception on failure.
6304 """
6305 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6306 cErrors = 0;
6307 aoParsers = [];
6308 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6309 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6310 sFilename = os.path.join(sSrcDir, sFilename);
6311 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6312 cErrors += cThisErrors;
6313 aoParsers.append(oParser);
6314 cErrors += __doTestCopying();
6315 cErrors += __applyOnlyTest();
6316
6317 # Total stub stats:
6318 cTotalStubs = 0;
6319 for oInstr in g_aoAllInstructions:
6320 cTotalStubs += oInstr.fStub;
6321 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6322 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6323 file = sys.stderr);
6324
6325 if cErrors != 0:
6326 raise Exception('%d parse errors' % (cErrors,));
6327 return aoParsers;
6328
6329
6330def parseFiles(asFiles, sHostArch = None):
6331 """
6332 Parses a selection of IEMAllInstruction*.cpp.h files.
6333
6334 Returns a list of the parsers on success.
6335 Raises exception on failure.
6336 """
6337 # Look up default maps for the files and call __parseFilesWorker to do the job.
6338 asFilesAndDefaultMap = [];
6339 for sFilename in asFiles:
6340 sName = os.path.split(sFilename)[1].lower();
6341 sMap = None;
6342 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6343 if aoInfo[0].lower() == sName:
6344 sMap = aoInfo[1];
6345 break;
6346 if not sMap:
6347 raise Exception('Unable to classify file: %s' % (sFilename,));
6348 asFilesAndDefaultMap.append((sFilename, sMap));
6349
6350 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6351
6352
6353def parseAll(sHostArch = None):
6354 """
6355 Parses all the IEMAllInstruction*.cpp.h files.
6356
6357 Returns a list of the parsers on success.
6358 Raises exception on failure.
6359 """
6360 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6361
6362
6363#
6364# Generators (may perhaps move later).
6365#
6366def __formatDisassemblerTableEntry(oInstr):
6367 """
6368 """
6369 sMacro = 'OP';
6370 cMaxOperands = 3;
6371 if len(oInstr.aoOperands) > 3:
6372 sMacro = 'OPVEX'
6373 cMaxOperands = 4;
6374 assert len(oInstr.aoOperands) <= cMaxOperands;
6375
6376 #
6377 # Format string.
6378 #
6379 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6380 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6381 sTmp += ' ' if iOperand == 0 else ',';
6382 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6383 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6384 else:
6385 sTmp += g_kdOpTypes[oOperand.sType][2];
6386 sTmp += '",';
6387 asColumns = [ sTmp, ];
6388
6389 #
6390 # Decoders.
6391 #
6392 iStart = len(asColumns);
6393 if oInstr.sEncoding is None:
6394 pass;
6395 elif oInstr.sEncoding == 'ModR/M':
6396 # ASSUME the first operand is using the ModR/M encoding
6397 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6398 asColumns.append('IDX_ParseModRM,');
6399 elif oInstr.sEncoding in [ 'prefix', ]:
6400 for oOperand in oInstr.aoOperands:
6401 asColumns.append('0,');
6402 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6403 pass;
6404 elif oInstr.sEncoding == 'VEX.ModR/M':
6405 asColumns.append('IDX_ParseModRM,');
6406 elif oInstr.sEncoding == 'vex2':
6407 asColumns.append('IDX_ParseVex2b,')
6408 elif oInstr.sEncoding == 'vex3':
6409 asColumns.append('IDX_ParseVex3b,')
6410 elif oInstr.sEncoding in g_dInstructionMaps:
6411 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6412 else:
6413 ## @todo
6414 #IDX_ParseTwoByteEsc,
6415 #IDX_ParseGrp1,
6416 #IDX_ParseShiftGrp2,
6417 #IDX_ParseGrp3,
6418 #IDX_ParseGrp4,
6419 #IDX_ParseGrp5,
6420 #IDX_Parse3DNow,
6421 #IDX_ParseGrp6,
6422 #IDX_ParseGrp7,
6423 #IDX_ParseGrp8,
6424 #IDX_ParseGrp9,
6425 #IDX_ParseGrp10,
6426 #IDX_ParseGrp12,
6427 #IDX_ParseGrp13,
6428 #IDX_ParseGrp14,
6429 #IDX_ParseGrp15,
6430 #IDX_ParseGrp16,
6431 #IDX_ParseThreeByteEsc4,
6432 #IDX_ParseThreeByteEsc5,
6433 #IDX_ParseModFence,
6434 #IDX_ParseEscFP,
6435 #IDX_ParseNopPause,
6436 #IDX_ParseInvOpModRM,
6437 assert False, str(oInstr);
6438
6439 # Check for immediates and stuff in the remaining operands.
6440 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6441 sIdx = g_kdOpTypes[oOperand.sType][0];
6442 #if sIdx != 'IDX_UseModRM':
6443 asColumns.append(sIdx + ',');
6444 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6445
6446 #
6447 # Opcode and operands.
6448 #
6449 assert oInstr.sDisEnum, str(oInstr);
6450 asColumns.append(oInstr.sDisEnum + ',');
6451 iStart = len(asColumns)
6452 for oOperand in oInstr.aoOperands:
6453 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6454 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6455
6456 #
6457 # Flags.
6458 #
6459 sTmp = '';
6460 for sHint in sorted(oInstr.dHints.keys()):
6461 sDefine = g_kdHints[sHint];
6462 if sDefine.startswith('DISOPTYPE_'):
6463 if sTmp:
6464 sTmp += ' | ' + sDefine;
6465 else:
6466 sTmp += sDefine;
6467 if sTmp:
6468 sTmp += '),';
6469 else:
6470 sTmp += '0),';
6471 asColumns.append(sTmp);
6472
6473 #
6474 # Format the columns into a line.
6475 #
6476 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6477 sLine = '';
6478 for i, s in enumerate(asColumns):
6479 if len(sLine) < aoffColumns[i]:
6480 sLine += ' ' * (aoffColumns[i] - len(sLine));
6481 else:
6482 sLine += ' ';
6483 sLine += s;
6484
6485 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6486 # DISOPTYPE_HARMLESS),
6487 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6488 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6489 return sLine;
6490
6491def __checkIfShortTable(aoTableOrdered, oMap):
6492 """
6493 Returns (iInstr, cInstructions, fShortTable)
6494 """
6495
6496 # Determin how much we can trim off.
6497 cInstructions = len(aoTableOrdered);
6498 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6499 cInstructions -= 1;
6500
6501 iInstr = 0;
6502 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6503 iInstr += 1;
6504
6505 # If we can save more than 30%, we go for the short table version.
6506 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6507 return (iInstr, cInstructions, True);
6508 _ = oMap; # Use this for overriding.
6509
6510 # Output the full table.
6511 return (0, len(aoTableOrdered), False);
6512
6513def generateDisassemblerTables(oDstFile = sys.stdout):
6514 """
6515 Generates disassembler tables.
6516
6517 Returns exit code.
6518 """
6519
6520 #
6521 # Parse all.
6522 #
6523 try:
6524 parseAll();
6525 except Exception as oXcpt:
6526 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6527 traceback.print_exc(file = sys.stderr);
6528 return 1;
6529
6530
6531 #
6532 # The disassembler uses a slightly different table layout to save space,
6533 # since several of the prefix varia
6534 #
6535 aoDisasmMaps = [];
6536 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6537 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6538 if oMap.sSelector != 'byte+pfx':
6539 aoDisasmMaps.append(oMap);
6540 else:
6541 # Split the map by prefix.
6542 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6543 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6544 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6545 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6546
6547 #
6548 # Dump each map.
6549 #
6550 asHeaderLines = [];
6551 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6552 for oMap in aoDisasmMaps:
6553 sName = oMap.sName;
6554
6555 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6556
6557 #
6558 # Get the instructions for the map and see if we can do a short version or not.
6559 #
6560 aoTableOrder = oMap.getInstructionsInTableOrder();
6561 cEntriesPerByte = oMap.getEntriesPerByte();
6562 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6563
6564 #
6565 # Output the table start.
6566 # Note! Short tables are static and only accessible via the map range record.
6567 #
6568 asLines = [];
6569 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6570 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6571 if fShortTable:
6572 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6573 else:
6574 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6575 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6576 asLines.append('{');
6577
6578 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6579 asLines.append(' /* %#04x: */' % (iInstrStart,));
6580
6581 #
6582 # Output the instructions.
6583 #
6584 iInstr = iInstrStart;
6585 while iInstr < iInstrEnd:
6586 oInstr = aoTableOrder[iInstr];
6587 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6588 if iInstr != iInstrStart:
6589 asLines.append('');
6590 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6591
6592 if oInstr is None:
6593 # Invalid. Optimize blocks of invalid instructions.
6594 cInvalidInstrs = 1;
6595 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6596 cInvalidInstrs += 1;
6597 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6598 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6599 iInstr += 0x10 * cEntriesPerByte - 1;
6600 elif cEntriesPerByte > 1:
6601 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6602 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6603 iInstr += 3;
6604 else:
6605 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6606 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6607 else:
6608 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6609 elif isinstance(oInstr, list):
6610 if len(oInstr) != 0:
6611 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6612 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6613 else:
6614 asLines.append(__formatDisassemblerTableEntry(oInstr));
6615 else:
6616 asLines.append(__formatDisassemblerTableEntry(oInstr));
6617
6618 iInstr += 1;
6619
6620 if iInstrStart >= iInstrEnd:
6621 asLines.append(' /* dummy */ INVALID_OPCODE');
6622
6623 asLines.append('};');
6624 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6625
6626 #
6627 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6628 #
6629 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6630 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6631 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6632
6633 #
6634 # Write out the lines.
6635 #
6636 oDstFile.write('\n'.join(asLines));
6637 oDstFile.write('\n');
6638 oDstFile.write('\n');
6639 #break; #for now
6640 return 0;
6641
6642if __name__ == '__main__':
6643 sys.exit(generateDisassemblerTables());
6644
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette