VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103937

Last change on this file since 103937 was 103937, checked in by vboxsync, 13 months ago

VMM/IEM: Implement native emitter for IEM_MC_FETCH_XREG_U128(), bugref:10614

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 324.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103937 2024-03-20 08:50:52Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103937 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199 'imm8reg': [], ## byte immediate with register specified in bits 4 thru 7 (vpblendvb, vblendvps, vblendvpd).
200
201 # fixed registers.
202 'AL': [],
203 'rAX': [],
204 'rDX': [],
205 'CL': [],
206 'rSI': [],
207 'rDI': [],
208 'rFLAGS': [],
209 'CS': [],
210 'DS': [],
211 'ES': [],
212 'FS': [],
213 'GS': [],
214 'SS': [],
215 'XMM0': [],
216
217 # fixed valures.
218 '1': [],
219};
220
221## \@op[1-4] types
222##
223## Value fields:
224## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
225## - 1: the location (g_kdOpLocations).
226## - 2: disassembler format string version of the type.
227## - 3: disassembler OP_PARAM_XXX (XXX only).
228## - 4: IEM form matching instruction.
229##
230## Note! See the A.2.1 in SDM vol 2 for the type names.
231g_kdOpTypes = {
232 # Fixed addresses
233 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
234
235 # ModR/M.rm
236 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
237 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
238 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
239 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
240 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
241 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
242 'Ew_WO': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
243 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
244 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
245 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
246 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
247 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
248 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
249 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
250 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
251 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
252 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
253 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
254 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
255 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
256 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
257 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
258 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
259 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
260 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
261 'Wqq': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
262 'Wqq_WO': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
263 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
264 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
265
266 # ModR/M.rm - register only.
267 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
268 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
269 'Uqq': ( 'IDX_UseModRM', 'rm', '%Uqq', 'Uqq', 'REG' ),
270 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
271 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
272 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
273 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
274 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
275 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
276
277 # ModR/M.rm - memory only.
278 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
279 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
280 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
281 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
282 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
283 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
284 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
285 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
286 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
287 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
288 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
289 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
290 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
291 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
292 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
293
294 # ModR/M.reg
295 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
296 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
297 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
298 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
299 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
300 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
301 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
302 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
303 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
304 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
305 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
306 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
307 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
308 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
309 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
310 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
311 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
312 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
313 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
314 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
315 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
316 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
317 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
318 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
319 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
320 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
321 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
322 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
323 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
324 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
325 'Vqq': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
326 'Vqq_WO': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
327 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
328 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
329
330 # VEX.vvvv
331 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
332 'Hdq': ( 'IDX_UseModRM', 'vvvv', '%Hdq', 'Hdq', 'V', ),
333 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
334 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
335 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
336 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
337 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
338 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
339 'Hqq': ( 'IDX_UseModRM', 'vvvv', '%Hqq', 'Hqq', 'V', ),
340 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
341
342 # Immediate values.
343 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
344 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
345 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
346 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
347 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
348 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
349
350 # Registers encoded as immediates.
351 'Lx': ( 'IDX_ParseImmByte', 'imm8reg', '%Lx', 'Lx', '', ),
352
353 # Address operands (no ModR/M).
354 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
355 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
356
357 # Relative jump targets
358 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
359 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
360
361 # DS:rSI
362 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
363 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
364 # ES:rDI
365 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
366 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
367
368 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
369
370 # Fixed registers.
371 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
372 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
373 'REG_XMM0': ( 'IDX_ParseFixedReg', 'XMM0', 'xmm0', 'REG_XMM0','', ),
374 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
375 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
376 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
377 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
378 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
379 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
380 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
381 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
382
383 # Fixed values.
384 '1': ( '', '1', '1', '1', '', ),
385};
386
387# IDX_ParseFixedReg
388# IDX_ParseVexDest
389
390
391## IEMFORM_XXX mappings.
392g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
393 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
394 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
395 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
396 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
397 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
398 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
399 'RM0': ( 'ModR/M', [ 'reg', 'rm', 'XMM0' ], '', ),
400 'RM0_REG': ( 'ModR/M', [ 'reg', 'rm', 'XMM0' ], '11 mr/reg', ),
401 'RM0_MEM': ( 'ModR/M', [ 'reg', 'rm', 'XMM0' ], '!11 mr/reg', ),
402 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
403 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
404 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
405 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
406 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
407 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
408 'M': ( 'ModR/M', [ 'rm', ], '', ),
409 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
410 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
411 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
412 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
413 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
414 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
415 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
416 'R': ( 'ModR/M', [ 'reg', ], '', ),
417
418 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
419 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
420 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
421 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
422 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
423 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
424 'VEX_MRI': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
425 'VEX_MRI_REG': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
426 'VEX_MRI_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
427 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
428 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
429 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
430 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
431 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
432 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
433 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
434 'VEX_RVMI': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '', ),
435 'VEX_RVMI_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
436 'VEX_RVMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
437 'VEX_RVMR': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '', ),
438 'VEX_RVMR_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '11 mr/reg', ),
439 'VEX_RVMR_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm8reg' ], '!11 mr/reg', ),
440 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
441 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
442 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
443 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
444 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
445 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
446 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
447 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
448 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
449
450 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
451 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
452 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
453 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
454 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
455 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
456
457 'FIXED': ( 'fixed', None, '', ),
458};
459
460## \@oppfx values.
461g_kdPrefixes = {
462 'none': [],
463 '0x66': [],
464 '0xf3': [],
465 '0xf2': [],
466 '!0xf3': [], # special case for bsf/tzcnt
467};
468
469## Special \@opcode tag values.
470g_kdSpecialOpcodes = {
471 '/reg': [],
472 'mr/reg': [],
473 '11 /reg': [],
474 '!11 /reg': [],
475 '11 mr/reg': [],
476 '!11 mr/reg': [],
477};
478
479## Special \@opcodesub tag values.
480## The first value is the real value for aliases.
481## The second value is for bs3cg1.
482g_kdSubOpcodes = {
483 'none': [ None, '', ],
484 '11 mr/reg': [ '11 mr/reg', '', ],
485 '11': [ '11 mr/reg', '', ], ##< alias
486 '!11 mr/reg': [ '!11 mr/reg', '', ],
487 '!11': [ '!11 mr/reg', '', ], ##< alias
488 'rex.w=0': [ 'rex.w=0', 'WZ', ],
489 'w=0': [ 'rex.w=0', '', ], ##< alias
490 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
491 'w=1': [ 'rex.w=1', '', ], ##< alias
492 'vex.l=0': [ 'vex.l=0', 'L0', ],
493 'vex.l=1': [ 'vex.l=0', 'L1', ],
494 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
495 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
496 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
497 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
498 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
499 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
500};
501
502## Valid values for \@openc
503g_kdEncodings = {
504 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
505 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
506 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
507 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
508 'prefix': [ None, ], ##< Prefix
509};
510
511## \@opunused, \@opinvalid, \@opinvlstyle
512g_kdInvalidStyles = {
513 'immediate': [], ##< CPU stops decoding immediately after the opcode.
514 'vex.modrm': [], ##< VEX+ModR/M, everyone.
515 'intel-modrm': [], ##< Intel decodes ModR/M.
516 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
517 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
518 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
519};
520
521g_kdCpuNames = {
522 '8086': (),
523 '80186': (),
524 '80286': (),
525 '80386': (),
526 '80486': (),
527};
528
529## \@opcpuid
530g_kdCpuIdFlags = {
531 'vme': 'X86_CPUID_FEATURE_EDX_VME',
532 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
533 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
534 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
535 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
536 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
537 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
538 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
539 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
540 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
541 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
542 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
543 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
544 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
545 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
546 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
547 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
548 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
549 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
550 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
551 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
552 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
553 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
554 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
555 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
556 'aes': 'X86_CPUID_FEATURE_ECX_AES',
557 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
558 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
559 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
560 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
561 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
562
563 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
564 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
565 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
566 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
567 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
568 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
569 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
570 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
571 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
572 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
573};
574
575## \@ophints values.
576# pylint: disable=line-too-long
577g_kdHints = {
578 'invalid': 'DISOPTYPE_INVALID', ##<
579 'harmless': 'DISOPTYPE_HARMLESS', ##<
580 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
581 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
582 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
583 'portio': 'DISOPTYPE_PORTIO', ##<
584 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
585 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
586 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
587 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
588 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
589 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
590 'illegal': 'DISOPTYPE_ILLEGAL', ##<
591 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
592 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
593 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
594 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
595 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
596 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
597 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
598 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
599 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
600 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
601 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
602 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
603 ## (only in 16 & 32 bits mode!)
604 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
605 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
606 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
607 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
608 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
609 'ignores_rexw': '', ##< Ignores REX.W.
610 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
611 'vex_l_zero': '', ##< VEX.L must be 0.
612 'vex_l_one': '', ##< VEX.L must be 1.
613 'vex_l_ignored': '', ##< VEX.L is ignored.
614 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
615 'lock_allowed': '', ##< Lock prefix allowed.
616};
617# pylint: enable=line-too-long
618
619## \@opxcpttype values (see SDMv2 2.4, 2.7).
620g_kdXcptTypes = {
621 'none': [],
622 '1': [],
623 '2': [],
624 '3': [],
625 '4': [],
626 '4UA': [],
627 '5': [],
628 '5LZ': [], # LZ = VEX.L must be zero.
629 '6': [],
630 '7': [],
631 '7LZ': [],
632 '8': [],
633 '11': [],
634 '12': [],
635 'E1': [],
636 'E1NF': [],
637 'E2': [],
638 'E3': [],
639 'E3NF': [],
640 'E4': [],
641 'E4NF': [],
642 'E5': [],
643 'E5NF': [],
644 'E6': [],
645 'E6NF': [],
646 'E7NF': [],
647 'E9': [],
648 'E9NF': [],
649 'E10': [],
650 'E11': [],
651 'E12': [],
652 'E12NF': [],
653};
654
655
656def _isValidOpcodeByte(sOpcode):
657 """
658 Checks if sOpcode is a valid lower case opcode byte.
659 Returns true/false.
660 """
661 if len(sOpcode) == 4:
662 if sOpcode[:2] == '0x':
663 if sOpcode[2] in '0123456789abcdef':
664 if sOpcode[3] in '0123456789abcdef':
665 return True;
666 return False;
667
668
669class InstructionMap(object):
670 """
671 Instruction map.
672
673 The opcode map provides the lead opcode bytes (empty for the one byte
674 opcode map). An instruction can be member of multiple opcode maps as long
675 as it uses the same opcode value within the map (because of VEX).
676 """
677
678 kdEncodings = {
679 'legacy': [],
680 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
681 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
682 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
683 'xop8': [], ##< XOP prefix with vvvvv = 8
684 'xop9': [], ##< XOP prefix with vvvvv = 9
685 'xop10': [], ##< XOP prefix with vvvvv = 10
686 };
687 ## Selectors.
688 ## 1. The first value is the number of table entries required by a
689 ## decoder or disassembler for this type of selector.
690 ## 2. The second value is how many entries per opcode byte if applicable.
691 kdSelectors = {
692 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
693 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
694 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
695 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
696 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
697 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
698 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
699 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
700 };
701
702 ## Define the subentry number according to the Instruction::sPrefix
703 ## value for 'byte+pfx' selected tables.
704 kiPrefixOrder = {
705 'none': 0,
706 '0x66': 1,
707 '0xf3': 2,
708 '0xf2': 3,
709 };
710
711 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
712 sEncoding = 'legacy', sDisParse = None):
713 assert sSelector in self.kdSelectors;
714 assert sEncoding in self.kdEncodings;
715 if asLeadOpcodes is None:
716 asLeadOpcodes = [];
717 else:
718 for sOpcode in asLeadOpcodes:
719 assert _isValidOpcodeByte(sOpcode);
720 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
721
722 self.sName = sName;
723 self.sIemName = sIemName;
724 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
725 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
726 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
727 self.aoInstructions = [] # type: Instruction
728 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
729
730 def copy(self, sNewName, sPrefixFilter = None):
731 """
732 Copies the table with filtering instruction by sPrefix if not None.
733 """
734 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
735 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
736 else self.sSelector,
737 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
738 if sPrefixFilter is None:
739 oCopy.aoInstructions = list(self.aoInstructions);
740 else:
741 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
742 return oCopy;
743
744 def getTableSize(self):
745 """
746 Number of table entries. This corresponds directly to the selector.
747 """
748 return self.kdSelectors[self.sSelector][0];
749
750 def getEntriesPerByte(self):
751 """
752 Number of table entries per opcode bytes.
753
754 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
755 the others it will just return 1.
756 """
757 return self.kdSelectors[self.sSelector][1];
758
759 def getInstructionIndex(self, oInstr):
760 """
761 Returns the table index for the instruction.
762 """
763 bOpcode = oInstr.getOpcodeByte();
764
765 # The byte selectors are simple. We need a full opcode byte and need just return it.
766 if self.sSelector == 'byte':
767 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
768 return bOpcode;
769
770 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
771 if self.sSelector == 'byte+pfx':
772 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
773 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
774 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
775
776 # The other selectors needs masking and shifting.
777 if self.sSelector == '/r':
778 return (bOpcode >> 3) & 0x7;
779
780 if self.sSelector == 'mod /r':
781 return (bOpcode >> 3) & 0x1f;
782
783 if self.sSelector == 'memreg /r':
784 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
785
786 if self.sSelector == '!11 /r':
787 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
788 return (bOpcode >> 3) & 0x7;
789
790 if self.sSelector == '11 /r':
791 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
792 return (bOpcode >> 3) & 0x7;
793
794 if self.sSelector == '11':
795 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
796 return bOpcode & 0x3f;
797
798 assert False, self.sSelector;
799 return -1;
800
801 def getInstructionsInTableOrder(self):
802 """
803 Get instructions in table order.
804
805 Returns array of instructions. Normally there is exactly one
806 instruction per entry. However the entry could also be None if
807 not instruction was specified for that opcode value. Or there
808 could be a list of instructions to deal with special encodings
809 where for instance prefix (e.g. REX.W) encodes a different
810 instruction or different CPUs have different instructions or
811 prefixes in the same place.
812 """
813 # Start with empty table.
814 cTable = self.getTableSize();
815 aoTable = [None] * cTable;
816
817 # Insert the instructions.
818 for oInstr in self.aoInstructions:
819 if oInstr.sOpcode:
820 idxOpcode = self.getInstructionIndex(oInstr);
821 assert idxOpcode < cTable, str(idxOpcode);
822
823 oExisting = aoTable[idxOpcode];
824 if oExisting is None:
825 aoTable[idxOpcode] = oInstr;
826 elif not isinstance(oExisting, list):
827 aoTable[idxOpcode] = list([oExisting, oInstr]);
828 else:
829 oExisting.append(oInstr);
830
831 return aoTable;
832
833
834 def getDisasTableName(self):
835 """
836 Returns the disassembler table name for this map.
837 """
838 sName = 'g_aDisas';
839 for sWord in self.sName.split('_'):
840 if sWord == 'm': # suffix indicating modrm.mod==mem
841 sName += '_m';
842 elif sWord == 'r': # suffix indicating modrm.mod==reg
843 sName += '_r';
844 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
845 sName += '_' + sWord;
846 else:
847 sWord = sWord.replace('grp', 'Grp');
848 sWord = sWord.replace('map', 'Map');
849 sName += sWord[0].upper() + sWord[1:];
850 return sName;
851
852 def getDisasRangeName(self):
853 """
854 Returns the disassembler table range name for this map.
855 """
856 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
857
858 def isVexMap(self):
859 """ Returns True if a VEX map. """
860 return self.sEncoding.startswith('vex');
861
862
863class TestType(object):
864 """
865 Test value type.
866
867 This base class deals with integer like values. The fUnsigned constructor
868 parameter indicates the default stance on zero vs sign extending. It is
869 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
870 """
871 def __init__(self, sName, acbSizes = None, fUnsigned = True):
872 self.sName = sName;
873 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
874 self.fUnsigned = fUnsigned;
875
876 class BadValue(Exception):
877 """ Bad value exception. """
878 def __init__(self, sMessage):
879 Exception.__init__(self, sMessage);
880 self.sMessage = sMessage;
881
882 ## For ascii ~ operator.
883 kdHexInv = {
884 '0': 'f',
885 '1': 'e',
886 '2': 'd',
887 '3': 'c',
888 '4': 'b',
889 '5': 'a',
890 '6': '9',
891 '7': '8',
892 '8': '7',
893 '9': '6',
894 'a': '5',
895 'b': '4',
896 'c': '3',
897 'd': '2',
898 'e': '1',
899 'f': '0',
900 };
901
902 def get(self, sValue):
903 """
904 Get the shortest normal sized byte representation of oValue.
905
906 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
907 The latter form is for AND+OR pairs where the first entry is what to
908 AND with the field and the second the one or OR with.
909
910 Raises BadValue if invalid value.
911 """
912 if not sValue:
913 raise TestType.BadValue('empty value');
914
915 # Deal with sign and detect hexadecimal or decimal.
916 fSignExtend = not self.fUnsigned;
917 if sValue[0] == '-' or sValue[0] == '+':
918 fSignExtend = True;
919 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
920 else:
921 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
922
923 # try convert it to long integer.
924 try:
925 iValue = long(sValue, 16 if fHex else 10);
926 except Exception as oXcpt:
927 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
928
929 # Convert the hex string and pad it to a decent value. Negative values
930 # needs to be manually converted to something non-negative (~-n + 1).
931 if iValue >= 0:
932 sHex = hex(iValue);
933 if sys.version_info[0] < 3:
934 assert sHex[-1] == 'L';
935 sHex = sHex[:-1];
936 assert sHex[:2] == '0x';
937 sHex = sHex[2:];
938 else:
939 sHex = hex(-iValue - 1);
940 if sys.version_info[0] < 3:
941 assert sHex[-1] == 'L';
942 sHex = sHex[:-1];
943 assert sHex[:2] == '0x';
944 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
945 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
946 sHex = 'f' + sHex;
947
948 cDigits = len(sHex);
949 if cDigits <= self.acbSizes[-1] * 2:
950 for cb in self.acbSizes:
951 cNaturalDigits = cb * 2;
952 if cDigits <= cNaturalDigits:
953 break;
954 else:
955 cNaturalDigits = self.acbSizes[-1] * 2;
956 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
957 assert isinstance(cNaturalDigits, int)
958
959 if cNaturalDigits != cDigits:
960 cNeeded = cNaturalDigits - cDigits;
961 if iValue >= 0:
962 sHex = ('0' * cNeeded) + sHex;
963 else:
964 sHex = ('f' * cNeeded) + sHex;
965
966 # Invert and convert to bytearray and return it.
967 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
968
969 return ((fSignExtend, abValue),);
970
971 def validate(self, sValue):
972 """
973 Returns True if value is okay, error message on failure.
974 """
975 try:
976 self.get(sValue);
977 except TestType.BadValue as oXcpt:
978 return oXcpt.sMessage;
979 return True;
980
981 def isAndOrPair(self, sValue):
982 """
983 Checks if sValue is a pair.
984 """
985 _ = sValue;
986 return False;
987
988
989class TestTypeEflags(TestType):
990 """
991 Special value parsing for EFLAGS/RFLAGS/FLAGS.
992 """
993
994 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
995
996 def __init__(self, sName):
997 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
998
999 def get(self, sValue):
1000 fClear = 0;
1001 fSet = 0;
1002 for sFlag in sValue.split(','):
1003 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
1004 if sConstant is None:
1005 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1006 if sConstant[0] == '!':
1007 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
1008 else:
1009 fSet |= g_kdX86EFlagsConstants[sConstant];
1010
1011 aoSet = TestType.get(self, '0x%x' % (fSet,));
1012 if fClear != 0:
1013 aoClear = TestType.get(self, '%#x' % (fClear,))
1014 assert self.isAndOrPair(sValue) is True;
1015 return (aoClear[0], aoSet[0]);
1016 assert self.isAndOrPair(sValue) is False;
1017 return aoSet;
1018
1019 def isAndOrPair(self, sValue):
1020 for sZeroFlag in self.kdZeroValueFlags:
1021 if sValue.find(sZeroFlag) >= 0:
1022 return True;
1023 return False;
1024
1025class TestTypeFromDict(TestType):
1026 """
1027 Special value parsing for CR0.
1028 """
1029
1030 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1031
1032 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1033 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1034 self.kdConstantsAndValues = kdConstantsAndValues;
1035 self.sConstantPrefix = sConstantPrefix;
1036
1037 def get(self, sValue):
1038 fValue = 0;
1039 for sFlag in sValue.split(','):
1040 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1041 if fFlagValue is None:
1042 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1043 fValue |= fFlagValue;
1044 return TestType.get(self, '0x%x' % (fValue,));
1045
1046
1047class TestInOut(object):
1048 """
1049 One input or output state modifier.
1050
1051 This should be thought as values to modify BS3REGCTX and extended (needs
1052 to be structured) state.
1053 """
1054 ## Assigned operators.
1055 kasOperators = [
1056 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1057 '&~=',
1058 '&=',
1059 '|=',
1060 '='
1061 ];
1062 ## Types
1063 kdTypes = {
1064 'uint': TestType('uint', fUnsigned = True),
1065 'int': TestType('int'),
1066 'efl': TestTypeEflags('efl'),
1067 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1068 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1069 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1070 };
1071 ## CPU context fields.
1072 kdFields = {
1073 # name: ( default type, [both|input|output], )
1074 # Operands.
1075 'op1': ( 'uint', 'both', ), ## \@op1
1076 'op2': ( 'uint', 'both', ), ## \@op2
1077 'op3': ( 'uint', 'both', ), ## \@op3
1078 'op4': ( 'uint', 'both', ), ## \@op4
1079 # Flags.
1080 'efl': ( 'efl', 'both', ),
1081 'efl_undef': ( 'uint', 'output', ),
1082 # 8-bit GPRs.
1083 'al': ( 'uint', 'both', ),
1084 'cl': ( 'uint', 'both', ),
1085 'dl': ( 'uint', 'both', ),
1086 'bl': ( 'uint', 'both', ),
1087 'ah': ( 'uint', 'both', ),
1088 'ch': ( 'uint', 'both', ),
1089 'dh': ( 'uint', 'both', ),
1090 'bh': ( 'uint', 'both', ),
1091 'r8l': ( 'uint', 'both', ),
1092 'r9l': ( 'uint', 'both', ),
1093 'r10l': ( 'uint', 'both', ),
1094 'r11l': ( 'uint', 'both', ),
1095 'r12l': ( 'uint', 'both', ),
1096 'r13l': ( 'uint', 'both', ),
1097 'r14l': ( 'uint', 'both', ),
1098 'r15l': ( 'uint', 'both', ),
1099 # 16-bit GPRs.
1100 'ax': ( 'uint', 'both', ),
1101 'dx': ( 'uint', 'both', ),
1102 'cx': ( 'uint', 'both', ),
1103 'bx': ( 'uint', 'both', ),
1104 'sp': ( 'uint', 'both', ),
1105 'bp': ( 'uint', 'both', ),
1106 'si': ( 'uint', 'both', ),
1107 'di': ( 'uint', 'both', ),
1108 'r8w': ( 'uint', 'both', ),
1109 'r9w': ( 'uint', 'both', ),
1110 'r10w': ( 'uint', 'both', ),
1111 'r11w': ( 'uint', 'both', ),
1112 'r12w': ( 'uint', 'both', ),
1113 'r13w': ( 'uint', 'both', ),
1114 'r14w': ( 'uint', 'both', ),
1115 'r15w': ( 'uint', 'both', ),
1116 # 32-bit GPRs.
1117 'eax': ( 'uint', 'both', ),
1118 'edx': ( 'uint', 'both', ),
1119 'ecx': ( 'uint', 'both', ),
1120 'ebx': ( 'uint', 'both', ),
1121 'esp': ( 'uint', 'both', ),
1122 'ebp': ( 'uint', 'both', ),
1123 'esi': ( 'uint', 'both', ),
1124 'edi': ( 'uint', 'both', ),
1125 'r8d': ( 'uint', 'both', ),
1126 'r9d': ( 'uint', 'both', ),
1127 'r10d': ( 'uint', 'both', ),
1128 'r11d': ( 'uint', 'both', ),
1129 'r12d': ( 'uint', 'both', ),
1130 'r13d': ( 'uint', 'both', ),
1131 'r14d': ( 'uint', 'both', ),
1132 'r15d': ( 'uint', 'both', ),
1133 # 64-bit GPRs.
1134 'rax': ( 'uint', 'both', ),
1135 'rdx': ( 'uint', 'both', ),
1136 'rcx': ( 'uint', 'both', ),
1137 'rbx': ( 'uint', 'both', ),
1138 'rsp': ( 'uint', 'both', ),
1139 'rbp': ( 'uint', 'both', ),
1140 'rsi': ( 'uint', 'both', ),
1141 'rdi': ( 'uint', 'both', ),
1142 'r8': ( 'uint', 'both', ),
1143 'r9': ( 'uint', 'both', ),
1144 'r10': ( 'uint', 'both', ),
1145 'r11': ( 'uint', 'both', ),
1146 'r12': ( 'uint', 'both', ),
1147 'r13': ( 'uint', 'both', ),
1148 'r14': ( 'uint', 'both', ),
1149 'r15': ( 'uint', 'both', ),
1150 # 16-bit, 32-bit or 64-bit registers according to operand size.
1151 'oz.rax': ( 'uint', 'both', ),
1152 'oz.rdx': ( 'uint', 'both', ),
1153 'oz.rcx': ( 'uint', 'both', ),
1154 'oz.rbx': ( 'uint', 'both', ),
1155 'oz.rsp': ( 'uint', 'both', ),
1156 'oz.rbp': ( 'uint', 'both', ),
1157 'oz.rsi': ( 'uint', 'both', ),
1158 'oz.rdi': ( 'uint', 'both', ),
1159 'oz.r8': ( 'uint', 'both', ),
1160 'oz.r9': ( 'uint', 'both', ),
1161 'oz.r10': ( 'uint', 'both', ),
1162 'oz.r11': ( 'uint', 'both', ),
1163 'oz.r12': ( 'uint', 'both', ),
1164 'oz.r13': ( 'uint', 'both', ),
1165 'oz.r14': ( 'uint', 'both', ),
1166 'oz.r15': ( 'uint', 'both', ),
1167 # Control registers.
1168 'cr0': ( 'cr0', 'both', ),
1169 'cr4': ( 'cr4', 'both', ),
1170 'xcr0': ( 'xcr0', 'both', ),
1171 # FPU Registers
1172 'fcw': ( 'uint', 'both', ),
1173 'fsw': ( 'uint', 'both', ),
1174 'ftw': ( 'uint', 'both', ),
1175 'fop': ( 'uint', 'both', ),
1176 'fpuip': ( 'uint', 'both', ),
1177 'fpucs': ( 'uint', 'both', ),
1178 'fpudp': ( 'uint', 'both', ),
1179 'fpuds': ( 'uint', 'both', ),
1180 'mxcsr': ( 'uint', 'both', ),
1181 'st0': ( 'uint', 'both', ),
1182 'st1': ( 'uint', 'both', ),
1183 'st2': ( 'uint', 'both', ),
1184 'st3': ( 'uint', 'both', ),
1185 'st4': ( 'uint', 'both', ),
1186 'st5': ( 'uint', 'both', ),
1187 'st6': ( 'uint', 'both', ),
1188 'st7': ( 'uint', 'both', ),
1189 # MMX registers.
1190 'mm0': ( 'uint', 'both', ),
1191 'mm1': ( 'uint', 'both', ),
1192 'mm2': ( 'uint', 'both', ),
1193 'mm3': ( 'uint', 'both', ),
1194 'mm4': ( 'uint', 'both', ),
1195 'mm5': ( 'uint', 'both', ),
1196 'mm6': ( 'uint', 'both', ),
1197 'mm7': ( 'uint', 'both', ),
1198 # SSE registers.
1199 'xmm0': ( 'uint', 'both', ),
1200 'xmm1': ( 'uint', 'both', ),
1201 'xmm2': ( 'uint', 'both', ),
1202 'xmm3': ( 'uint', 'both', ),
1203 'xmm4': ( 'uint', 'both', ),
1204 'xmm5': ( 'uint', 'both', ),
1205 'xmm6': ( 'uint', 'both', ),
1206 'xmm7': ( 'uint', 'both', ),
1207 'xmm8': ( 'uint', 'both', ),
1208 'xmm9': ( 'uint', 'both', ),
1209 'xmm10': ( 'uint', 'both', ),
1210 'xmm11': ( 'uint', 'both', ),
1211 'xmm12': ( 'uint', 'both', ),
1212 'xmm13': ( 'uint', 'both', ),
1213 'xmm14': ( 'uint', 'both', ),
1214 'xmm15': ( 'uint', 'both', ),
1215 'xmm0.lo': ( 'uint', 'both', ),
1216 'xmm1.lo': ( 'uint', 'both', ),
1217 'xmm2.lo': ( 'uint', 'both', ),
1218 'xmm3.lo': ( 'uint', 'both', ),
1219 'xmm4.lo': ( 'uint', 'both', ),
1220 'xmm5.lo': ( 'uint', 'both', ),
1221 'xmm6.lo': ( 'uint', 'both', ),
1222 'xmm7.lo': ( 'uint', 'both', ),
1223 'xmm8.lo': ( 'uint', 'both', ),
1224 'xmm9.lo': ( 'uint', 'both', ),
1225 'xmm10.lo': ( 'uint', 'both', ),
1226 'xmm11.lo': ( 'uint', 'both', ),
1227 'xmm12.lo': ( 'uint', 'both', ),
1228 'xmm13.lo': ( 'uint', 'both', ),
1229 'xmm14.lo': ( 'uint', 'both', ),
1230 'xmm15.lo': ( 'uint', 'both', ),
1231 'xmm0.hi': ( 'uint', 'both', ),
1232 'xmm1.hi': ( 'uint', 'both', ),
1233 'xmm2.hi': ( 'uint', 'both', ),
1234 'xmm3.hi': ( 'uint', 'both', ),
1235 'xmm4.hi': ( 'uint', 'both', ),
1236 'xmm5.hi': ( 'uint', 'both', ),
1237 'xmm6.hi': ( 'uint', 'both', ),
1238 'xmm7.hi': ( 'uint', 'both', ),
1239 'xmm8.hi': ( 'uint', 'both', ),
1240 'xmm9.hi': ( 'uint', 'both', ),
1241 'xmm10.hi': ( 'uint', 'both', ),
1242 'xmm11.hi': ( 'uint', 'both', ),
1243 'xmm12.hi': ( 'uint', 'both', ),
1244 'xmm13.hi': ( 'uint', 'both', ),
1245 'xmm14.hi': ( 'uint', 'both', ),
1246 'xmm15.hi': ( 'uint', 'both', ),
1247 'xmm0.lo.zx': ( 'uint', 'both', ),
1248 'xmm1.lo.zx': ( 'uint', 'both', ),
1249 'xmm2.lo.zx': ( 'uint', 'both', ),
1250 'xmm3.lo.zx': ( 'uint', 'both', ),
1251 'xmm4.lo.zx': ( 'uint', 'both', ),
1252 'xmm5.lo.zx': ( 'uint', 'both', ),
1253 'xmm6.lo.zx': ( 'uint', 'both', ),
1254 'xmm7.lo.zx': ( 'uint', 'both', ),
1255 'xmm8.lo.zx': ( 'uint', 'both', ),
1256 'xmm9.lo.zx': ( 'uint', 'both', ),
1257 'xmm10.lo.zx': ( 'uint', 'both', ),
1258 'xmm11.lo.zx': ( 'uint', 'both', ),
1259 'xmm12.lo.zx': ( 'uint', 'both', ),
1260 'xmm13.lo.zx': ( 'uint', 'both', ),
1261 'xmm14.lo.zx': ( 'uint', 'both', ),
1262 'xmm15.lo.zx': ( 'uint', 'both', ),
1263 'xmm0.dw0': ( 'uint', 'both', ),
1264 'xmm1.dw0': ( 'uint', 'both', ),
1265 'xmm2.dw0': ( 'uint', 'both', ),
1266 'xmm3.dw0': ( 'uint', 'both', ),
1267 'xmm4.dw0': ( 'uint', 'both', ),
1268 'xmm5.dw0': ( 'uint', 'both', ),
1269 'xmm6.dw0': ( 'uint', 'both', ),
1270 'xmm7.dw0': ( 'uint', 'both', ),
1271 'xmm8.dw0': ( 'uint', 'both', ),
1272 'xmm9.dw0': ( 'uint', 'both', ),
1273 'xmm10.dw0': ( 'uint', 'both', ),
1274 'xmm11.dw0': ( 'uint', 'both', ),
1275 'xmm12.dw0': ( 'uint', 'both', ),
1276 'xmm13.dw0': ( 'uint', 'both', ),
1277 'xmm14.dw0': ( 'uint', 'both', ),
1278 'xmm15_dw0': ( 'uint', 'both', ),
1279 # AVX registers.
1280 'ymm0': ( 'uint', 'both', ),
1281 'ymm1': ( 'uint', 'both', ),
1282 'ymm2': ( 'uint', 'both', ),
1283 'ymm3': ( 'uint', 'both', ),
1284 'ymm4': ( 'uint', 'both', ),
1285 'ymm5': ( 'uint', 'both', ),
1286 'ymm6': ( 'uint', 'both', ),
1287 'ymm7': ( 'uint', 'both', ),
1288 'ymm8': ( 'uint', 'both', ),
1289 'ymm9': ( 'uint', 'both', ),
1290 'ymm10': ( 'uint', 'both', ),
1291 'ymm11': ( 'uint', 'both', ),
1292 'ymm12': ( 'uint', 'both', ),
1293 'ymm13': ( 'uint', 'both', ),
1294 'ymm14': ( 'uint', 'both', ),
1295 'ymm15': ( 'uint', 'both', ),
1296
1297 # Special ones.
1298 'value.xcpt': ( 'uint', 'output', ),
1299 };
1300
1301 def __init__(self, sField, sOp, sValue, sType):
1302 assert sField in self.kdFields;
1303 assert sOp in self.kasOperators;
1304 self.sField = sField;
1305 self.sOp = sOp;
1306 self.sValue = sValue;
1307 self.sType = sType;
1308 assert isinstance(sField, str);
1309 assert isinstance(sOp, str);
1310 assert isinstance(sType, str);
1311 assert isinstance(sValue, str);
1312
1313
1314class TestSelector(object):
1315 """
1316 One selector for an instruction test.
1317 """
1318 ## Selector compare operators.
1319 kasCompareOps = [ '==', '!=' ];
1320 ## Selector variables and their valid values.
1321 kdVariables = {
1322 # Operand size.
1323 'size': {
1324 'o16': 'size_o16',
1325 'o32': 'size_o32',
1326 'o64': 'size_o64',
1327 },
1328 # VEX.L value.
1329 'vex.l': {
1330 '0': 'vexl_0',
1331 '1': 'vexl_1',
1332 },
1333 # Execution ring.
1334 'ring': {
1335 '0': 'ring_0',
1336 '1': 'ring_1',
1337 '2': 'ring_2',
1338 '3': 'ring_3',
1339 '0..2': 'ring_0_thru_2',
1340 '1..3': 'ring_1_thru_3',
1341 },
1342 # Basic code mode.
1343 'codebits': {
1344 '64': 'code_64bit',
1345 '32': 'code_32bit',
1346 '16': 'code_16bit',
1347 },
1348 # cpu modes.
1349 'mode': {
1350 'real': 'mode_real',
1351 'prot': 'mode_prot',
1352 'long': 'mode_long',
1353 'v86': 'mode_v86',
1354 'smm': 'mode_smm',
1355 'vmx': 'mode_vmx',
1356 'svm': 'mode_svm',
1357 },
1358 # paging on/off
1359 'paging': {
1360 'on': 'paging_on',
1361 'off': 'paging_off',
1362 },
1363 # CPU vendor
1364 'vendor': {
1365 'amd': 'vendor_amd',
1366 'intel': 'vendor_intel',
1367 'via': 'vendor_via',
1368 },
1369 };
1370 ## Selector shorthand predicates.
1371 ## These translates into variable expressions.
1372 kdPredicates = {
1373 'o16': 'size==o16',
1374 'o32': 'size==o32',
1375 'o64': 'size==o64',
1376 'ring0': 'ring==0',
1377 '!ring0': 'ring==1..3',
1378 'ring1': 'ring==1',
1379 'ring2': 'ring==2',
1380 'ring3': 'ring==3',
1381 'user': 'ring==3',
1382 'supervisor': 'ring==0..2',
1383 '16-bit': 'codebits==16',
1384 '32-bit': 'codebits==32',
1385 '64-bit': 'codebits==64',
1386 'real': 'mode==real',
1387 'prot': 'mode==prot',
1388 'long': 'mode==long',
1389 'v86': 'mode==v86',
1390 'smm': 'mode==smm',
1391 'vmx': 'mode==vmx',
1392 'svm': 'mode==svm',
1393 'paging': 'paging==on',
1394 '!paging': 'paging==off',
1395 'amd': 'vendor==amd',
1396 '!amd': 'vendor!=amd',
1397 'intel': 'vendor==intel',
1398 '!intel': 'vendor!=intel',
1399 'via': 'vendor==via',
1400 '!via': 'vendor!=via',
1401 };
1402
1403 def __init__(self, sVariable, sOp, sValue):
1404 assert sVariable in self.kdVariables;
1405 assert sOp in self.kasCompareOps;
1406 assert sValue in self.kdVariables[sVariable];
1407 self.sVariable = sVariable;
1408 self.sOp = sOp;
1409 self.sValue = sValue;
1410
1411
1412class InstructionTest(object):
1413 """
1414 Instruction test.
1415 """
1416
1417 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1418 self.oInstr = oInstr # type: InstructionTest
1419 self.aoInputs = [] # type: List[TestInOut]
1420 self.aoOutputs = [] # type: List[TestInOut]
1421 self.aoSelectors = [] # type: List[TestSelector]
1422
1423 def toString(self, fRepr = False):
1424 """
1425 Converts it to string representation.
1426 """
1427 asWords = [];
1428 if self.aoSelectors:
1429 for oSelector in self.aoSelectors:
1430 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1431 asWords.append('/');
1432
1433 for oModifier in self.aoInputs:
1434 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1435
1436 asWords.append('->');
1437
1438 for oModifier in self.aoOutputs:
1439 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1440
1441 if fRepr:
1442 return '<' + ' '.join(asWords) + '>';
1443 return ' '.join(asWords);
1444
1445 def __str__(self):
1446 """ Provide string represenation. """
1447 return self.toString(False);
1448
1449 def __repr__(self):
1450 """ Provide unambigious string representation. """
1451 return self.toString(True);
1452
1453class Operand(object):
1454 """
1455 Instruction operand.
1456 """
1457
1458 def __init__(self, sWhere, sType):
1459 assert sWhere in g_kdOpLocations, sWhere;
1460 assert sType in g_kdOpTypes, sType;
1461 self.sWhere = sWhere; ##< g_kdOpLocations
1462 self.sType = sType; ##< g_kdOpTypes
1463
1464 def usesModRM(self):
1465 """ Returns True if using some form of ModR/M encoding. """
1466 return self.sType[0] in ['E', 'G', 'M'];
1467
1468
1469
1470class Instruction(object): # pylint: disable=too-many-instance-attributes
1471 """
1472 Instruction.
1473 """
1474
1475 def __init__(self, sSrcFile, iLine):
1476 ## @name Core attributes.
1477 ## @{
1478 self.oParent = None # type: Instruction
1479 self.sMnemonic = None;
1480 self.sBrief = None;
1481 self.asDescSections = [] # type: List[str]
1482 self.aoMaps = [] # type: List[InstructionMap]
1483 self.aoOperands = [] # type: List[Operand]
1484 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1485 self.sOpcode = None # type: str
1486 self.sSubOpcode = None # type: str
1487 self.sEncoding = None;
1488 self.asFlTest = None;
1489 self.asFlModify = None;
1490 self.asFlUndefined = None;
1491 self.asFlSet = None;
1492 self.asFlClear = None;
1493 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1494 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1495 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1496 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1497 self.aoTests = [] # type: List[InstructionTest]
1498 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1499 self.oCpuExpr = None; ##< Some CPU restriction expression...
1500 self.sGroup = None;
1501 self.fUnused = False; ##< Unused instruction.
1502 self.fInvalid = False; ##< Invalid instruction (like UD2).
1503 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1504 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1505 ## @}
1506
1507 ## @name Implementation attributes.
1508 ## @{
1509 self.sStats = None;
1510 self.sFunction = None;
1511 self.fStub = False;
1512 self.fUdStub = False;
1513 ## @}
1514
1515 ## @name Decoding info
1516 ## @{
1517 self.sSrcFile = sSrcFile;
1518 self.iLineCreated = iLine;
1519 self.iLineCompleted = None;
1520 self.cOpTags = 0;
1521 self.iLineFnIemOpMacro = -1;
1522 self.iLineMnemonicMacro = -1;
1523 ## @}
1524
1525 ## @name Intermediate input fields.
1526 ## @{
1527 self.sRawDisOpNo = None;
1528 self.asRawDisParams = [];
1529 self.sRawIemOpFlags = None;
1530 self.sRawOldOpcodes = None;
1531 self.asCopyTests = [];
1532 ## @}
1533
1534 ## All the MC blocks associated with this instruction.
1535 self.aoMcBlocks = [] # type: List[McBlock]
1536
1537 def toString(self, fRepr = False):
1538 """ Turn object into a string. """
1539 aasFields = [];
1540
1541 aasFields.append(['opcode', self.sOpcode]);
1542 if self.sPrefix:
1543 aasFields.append(['prefix', self.sPrefix]);
1544 aasFields.append(['mnemonic', self.sMnemonic]);
1545 for iOperand, oOperand in enumerate(self.aoOperands):
1546 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1547 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1548 aasFields.append(['encoding', self.sEncoding]);
1549 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1550 aasFields.append(['disenum', self.sDisEnum]);
1551 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1552 aasFields.append(['group', self.sGroup]);
1553 if self.fUnused: aasFields.append(['unused', 'True']);
1554 if self.fInvalid: aasFields.append(['invalid', 'True']);
1555 aasFields.append(['invlstyle', self.sInvalidStyle]);
1556 aasFields.append(['fltest', self.asFlTest]);
1557 aasFields.append(['flmodify', self.asFlModify]);
1558 aasFields.append(['flundef', self.asFlUndefined]);
1559 aasFields.append(['flset', self.asFlSet]);
1560 aasFields.append(['flclear', self.asFlClear]);
1561 aasFields.append(['mincpu', self.sMinCpu]);
1562 aasFields.append(['stats', self.sStats]);
1563 aasFields.append(['sFunction', self.sFunction]);
1564 if self.fStub: aasFields.append(['fStub', 'True']);
1565 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1566 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1567 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1568 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1569
1570 sRet = '<' if fRepr else '';
1571 for sField, sValue in aasFields:
1572 if sValue is not None:
1573 if len(sRet) > 1:
1574 sRet += '; ';
1575 sRet += '%s=%s' % (sField, sValue,);
1576 if fRepr:
1577 sRet += '>';
1578
1579 return sRet;
1580
1581 def __str__(self):
1582 """ Provide string represenation. """
1583 return self.toString(False);
1584
1585 def __repr__(self):
1586 """ Provide unambigious string representation. """
1587 return self.toString(True);
1588
1589 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1590 """
1591 Makes a copy of the object for the purpose of putting in a different map
1592 or a different place in the current map.
1593 """
1594 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1595
1596 oCopy.oParent = self;
1597 oCopy.sMnemonic = self.sMnemonic;
1598 oCopy.sBrief = self.sBrief;
1599 oCopy.asDescSections = list(self.asDescSections);
1600 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1601 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1602 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1603 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1604 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1605 oCopy.sEncoding = self.sEncoding;
1606 oCopy.asFlTest = self.asFlTest;
1607 oCopy.asFlModify = self.asFlModify;
1608 oCopy.asFlUndefined = self.asFlUndefined;
1609 oCopy.asFlSet = self.asFlSet;
1610 oCopy.asFlClear = self.asFlClear;
1611 oCopy.dHints = dict(self.dHints);
1612 oCopy.sDisEnum = self.sDisEnum;
1613 oCopy.asCpuIds = list(self.asCpuIds);
1614 oCopy.asReqFeatures = list(self.asReqFeatures);
1615 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1616 oCopy.sMinCpu = self.sMinCpu;
1617 oCopy.oCpuExpr = self.oCpuExpr;
1618 oCopy.sGroup = self.sGroup;
1619 oCopy.fUnused = self.fUnused;
1620 oCopy.fInvalid = self.fInvalid;
1621 oCopy.sInvalidStyle = self.sInvalidStyle;
1622 oCopy.sXcptType = self.sXcptType;
1623
1624 oCopy.sStats = self.sStats;
1625 oCopy.sFunction = self.sFunction;
1626 oCopy.fStub = self.fStub;
1627 oCopy.fUdStub = self.fUdStub;
1628
1629 oCopy.iLineCompleted = self.iLineCompleted;
1630 oCopy.cOpTags = self.cOpTags;
1631 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1632 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1633
1634 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1635 oCopy.asRawDisParams = list(self.asRawDisParams);
1636 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1637 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1638 oCopy.asCopyTests = list(self.asCopyTests);
1639
1640 return oCopy;
1641
1642 def getOpcodeByte(self):
1643 """
1644 Decodes sOpcode into a byte range integer value.
1645 Raises exception if sOpcode is None or invalid.
1646 """
1647 if self.sOpcode is None:
1648 raise Exception('No opcode byte for %s!' % (self,));
1649 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1650
1651 # Full hex byte form.
1652 if sOpcode[:2] == '0x':
1653 return int(sOpcode, 16);
1654
1655 # The /r form:
1656 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1657 return int(sOpcode[1:]) << 3;
1658
1659 # The 11/r form:
1660 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1661 return (int(sOpcode[-1:]) << 3) | 0xc0;
1662
1663 # The !11/r form (returns mod=1):
1664 ## @todo this doesn't really work...
1665 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1666 return (int(sOpcode[-1:]) << 3) | 0x80;
1667
1668 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1669
1670 @staticmethod
1671 def _flagsToIntegerMask(asFlags):
1672 """
1673 Returns the integer mask value for asFlags.
1674 """
1675 uRet = 0;
1676 if asFlags:
1677 for sFlag in asFlags:
1678 sConstant = g_kdEFlagsMnemonics[sFlag];
1679 assert sConstant[0] != '!', sConstant
1680 uRet |= g_kdX86EFlagsConstants[sConstant];
1681 return uRet;
1682
1683 def getTestedFlagsMask(self):
1684 """ Returns asFlTest into a integer mask value """
1685 return self._flagsToIntegerMask(self.asFlTest);
1686
1687 def getModifiedFlagsMask(self):
1688 """ Returns asFlModify into a integer mask value """
1689 return self._flagsToIntegerMask(self.asFlModify);
1690
1691 def getUndefinedFlagsMask(self):
1692 """ Returns asFlUndefined into a integer mask value """
1693 return self._flagsToIntegerMask(self.asFlUndefined);
1694
1695 def getSetFlagsMask(self):
1696 """ Returns asFlSet into a integer mask value """
1697 return self._flagsToIntegerMask(self.asFlSet);
1698
1699 def getClearedFlagsMask(self):
1700 """ Returns asFlClear into a integer mask value """
1701 return self._flagsToIntegerMask(self.asFlClear);
1702
1703 @staticmethod
1704 def _flagsToC(asFlags):
1705 """
1706 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1707 """
1708 if asFlags:
1709 asRet = [];
1710 for sFlag in asFlags:
1711 sConstant = g_kdEFlagsMnemonics[sFlag];
1712 assert sConstant[0] != '!', sConstant
1713 asRet.append(sConstant);
1714 return ' | '.join(asRet);
1715 return '0';
1716
1717 def getTestedFlagsCStyle(self):
1718 """ Returns asFlTest as C constants ored together. """
1719 return self._flagsToC(self.asFlTest);
1720
1721 def getModifiedFlagsCStyle(self):
1722 """ Returns asFlModify as C constants ored together. """
1723 return self._flagsToC(self.asFlModify);
1724
1725 def getUndefinedFlagsCStyle(self):
1726 """ Returns asFlUndefined as C constants ored together. """
1727 return self._flagsToC(self.asFlUndefined);
1728
1729 def getSetFlagsCStyle(self):
1730 """ Returns asFlSet as C constants ored together. """
1731 return self._flagsToC(self.asFlSet);
1732
1733 def getClearedFlagsCStyle(self):
1734 """ Returns asFlClear as C constants ored together. """
1735 return self._flagsToC(self.asFlClear);
1736
1737 def onlyInVexMaps(self):
1738 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1739 if not self.aoMaps:
1740 return False;
1741 for oMap in self.aoMaps:
1742 if not oMap.isVexMap():
1743 return False;
1744 return True;
1745
1746
1747
1748## All the instructions.
1749g_aoAllInstructions = [] # type: List[Instruction]
1750
1751## All the instructions indexed by statistics name (opstat).
1752g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1753
1754## All the instructions indexed by function name (opfunction).
1755g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1756
1757## Instructions tagged by oponlytest
1758g_aoOnlyTestInstructions = [] # type: List[Instruction]
1759
1760## Instruction maps.
1761g_aoInstructionMaps = [
1762 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1763 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1764 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1765 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1766 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1767 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1768 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1769 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1770 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1771 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1772 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1773 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1774 ## @todo g_apfnEscF1_E0toFF
1775 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1776 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1777 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1778 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1779 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1780 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1781 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1782 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1783
1784 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1785 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1786 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1787 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1788 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1789 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1790 ## @todo What about g_apfnGroup9MemReg?
1791 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1792 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1793 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1794 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1795 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1796 ## @todo What about g_apfnGroup15RegReg?
1797 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1798 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1799 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1800
1801 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1802 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1803
1804 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1805 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1806 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1807 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1808 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1809 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1810
1811 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1812 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1813
1814 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1815 InstructionMap('xopmap8', sEncoding = 'xop8'),
1816 InstructionMap('xopmap9', sEncoding = 'xop9'),
1817 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1818 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1819 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1820 InstructionMap('xopmap10', sEncoding = 'xop10'),
1821 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1822];
1823g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1824g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1825
1826
1827#
1828# Decoder functions.
1829#
1830
1831class DecoderFunction(object):
1832 """
1833 Decoder function.
1834
1835 This is mainly for searching for scoping searches for variables used in
1836 microcode blocks.
1837 """
1838 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1839 self.sName = sName; ##< The function name.
1840 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1841 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1842 self.iBeginLine = iBeginLine; ##< The start line.
1843 self.iEndLine = -1; ##< The line the function (probably) ends on.
1844 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1845
1846 def complete(self, iEndLine, asLines):
1847 """
1848 Completes the function.
1849 """
1850 assert self.iEndLine == -1;
1851 self.iEndLine = iEndLine;
1852 self.asLines = asLines;
1853
1854
1855#
1856# "Microcode" statements and blocks
1857#
1858
1859class McStmt(object):
1860 """
1861 Statement in a microcode block.
1862 """
1863 def __init__(self, sName, asParams):
1864 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1865 self.asParams = asParams;
1866 self.oUser = None;
1867
1868 def renderCode(self, cchIndent = 0):
1869 """
1870 Renders the code for the statement.
1871 """
1872 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1873
1874 @staticmethod
1875 def renderCodeForList(aoStmts, cchIndent = 0):
1876 """
1877 Renders a list of statements.
1878 """
1879 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1880
1881 @staticmethod
1882 def findStmtByNames(aoStmts, dNames):
1883 """
1884 Returns first statement with any of the given names in from the list.
1885
1886 Note! The names are passed as a dictionary for quick lookup, the value
1887 does not matter.
1888 """
1889 for oStmt in aoStmts:
1890 if oStmt.sName in dNames:
1891 return oStmt;
1892 if isinstance(oStmt, McStmtCond):
1893 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1894 if not oHit:
1895 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1896 if oHit:
1897 return oHit;
1898 return None;
1899
1900 @staticmethod
1901 def countStmtsByName(aoStmts, dNames, dRet):
1902 """
1903 Searches the given list of statements for the names in the dictionary,
1904 adding each found to dRet with an occurnece count.
1905
1906 return total number of hits;
1907 """
1908 cHits = 0;
1909 for oStmt in aoStmts:
1910 if oStmt.sName in dNames:
1911 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1912 cHits += 1;
1913 if isinstance(oStmt, McStmtCond):
1914 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1915 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1916 return cHits;
1917
1918 def isCppStmt(self):
1919 """ Checks if this is a C++ statement. """
1920 return self.sName.startswith('C++');
1921
1922class McStmtCond(McStmt):
1923 """
1924 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1925 """
1926 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1927 McStmt.__init__(self, sName, asParams);
1928 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1929 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1930 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1931 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1932 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1933
1934 def renderCode(self, cchIndent = 0):
1935 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1936 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1937 if self.aoElseBranch:
1938 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1939 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1940 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1941 return sRet;
1942
1943class McStmtNativeIf(McStmtCond):
1944 """ IEM_MC_NATIVE_IF """
1945 def __init__(self, sName, asArchitectures):
1946 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1947 self.asArchitectures = asArchitectures;
1948
1949class McStmtVar(McStmt):
1950 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1951 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1952 McStmt.__init__(self, sName, asParams);
1953 self.sType = sType;
1954 self.sVarName = sVarName;
1955 self.sValue = sValue; ##< None if no assigned / const value.
1956
1957class McStmtArg(McStmtVar):
1958 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1959 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1960 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1961 self.iArg = iArg;
1962 self.sRef = sRef; ##< The reference string (local variable, register).
1963 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1964 assert sRefType in ('none', 'local');
1965
1966class McStmtCall(McStmt):
1967 """ IEM_MC_CALL_* """
1968 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1969 McStmt.__init__(self, sName, asParams);
1970 self.idxFn = iFnParam;
1971 self.idxParams = iFnParam + 1;
1972 self.sFn = asParams[iFnParam];
1973 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1974
1975class McStmtAssertEFlags(McStmt):
1976 """
1977 IEM_MC_ASSERT_EFLAGS
1978 """
1979 def __init__(self, oInstruction):
1980 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1981 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1982
1983
1984class McCppGeneric(McStmt):
1985 """
1986 Generic C++/C statement.
1987 """
1988 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1989 McStmt.__init__(self, sName, [sCode,]);
1990 self.fDecode = fDecode;
1991 self.cchIndent = cchIndent;
1992
1993 def renderCode(self, cchIndent = 0):
1994 cchIndent += self.cchIndent;
1995 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1996 if self.fDecode:
1997 sRet = sRet.replace('\n', ' // C++ decode\n');
1998 else:
1999 sRet = sRet.replace('\n', ' // C++ normal\n');
2000 return sRet;
2001
2002class McCppCall(McCppGeneric):
2003 """
2004 A generic C++/C call statement.
2005
2006 The sName is still 'C++', so the function name is in the first parameter
2007 and the the arguments in the subsequent ones.
2008 """
2009 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
2010 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
2011 self.asParams.extend(asArgs);
2012
2013 def renderCode(self, cchIndent = 0):
2014 cchIndent += self.cchIndent;
2015 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
2016 if self.fDecode:
2017 sRet += ' // C++ decode\n';
2018 else:
2019 sRet += ' // C++ normal\n';
2020 return sRet;
2021
2022class McCppCond(McStmtCond):
2023 """
2024 C++/C 'if' statement.
2025 """
2026 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2027 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2028 self.fDecode = fDecode;
2029 self.cchIndent = cchIndent;
2030
2031 def renderCode(self, cchIndent = 0):
2032 cchIndent += self.cchIndent;
2033 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2034 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2035 sRet += ' ' * cchIndent + '{\n';
2036 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2037 sRet += ' ' * cchIndent + '}\n';
2038 if self.aoElseBranch:
2039 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2040 sRet += ' ' * cchIndent + '{\n';
2041 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2042 sRet += ' ' * cchIndent + '}\n';
2043 return sRet;
2044
2045class McCppPreProc(McCppGeneric):
2046 """
2047 C++/C Preprocessor directive.
2048 """
2049 def __init__(self, sCode):
2050 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2051
2052 def renderCode(self, cchIndent = 0):
2053 return self.asParams[0] + '\n';
2054
2055
2056## IEM_MC_F_XXX values.
2057g_kdMcFlags = {
2058 'IEM_MC_F_ONLY_8086': (),
2059 'IEM_MC_F_MIN_186': (),
2060 'IEM_MC_F_MIN_286': (),
2061 'IEM_MC_F_NOT_286_OR_OLDER': (),
2062 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2063 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2064 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2065 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2066 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2067 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2068 'IEM_MC_F_NOT_64BIT': (),
2069};
2070## IEM_MC_F_XXX values.
2071g_kdCImplFlags = {
2072 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2073 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2074 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2075 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2076 'IEM_CIMPL_F_BRANCH_FAR': (),
2077 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2078 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2079 'IEM_CIMPL_F_BRANCH_STACK': (),
2080 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2081 'IEM_CIMPL_F_MODE': (),
2082 'IEM_CIMPL_F_RFLAGS': (),
2083 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2084 'IEM_CIMPL_F_STATUS_FLAGS': (),
2085 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2086 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2087 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2088 'IEM_CIMPL_F_VMEXIT': (),
2089 'IEM_CIMPL_F_FPU': (),
2090 'IEM_CIMPL_F_REP': (),
2091 'IEM_CIMPL_F_IO': (),
2092 'IEM_CIMPL_F_END_TB': (),
2093 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2094 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2095 'IEM_CIMPL_F_CALLS_CIMPL': (),
2096 'IEM_CIMPL_F_CALLS_AIMPL': (),
2097 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2098};
2099class McBlock(object):
2100 """
2101 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2102 """
2103
2104 ## @name Macro expansion types.
2105 ## @{
2106 kiMacroExp_None = 0;
2107 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2108 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2109 ## @}
2110
2111 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2112 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2113 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2114 self.fDeferToCImpl = fDeferToCImpl;
2115 ## The source file containing the block.
2116 self.sSrcFile = sSrcFile;
2117 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2118 self.iBeginLine = iBeginLine;
2119 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2120 self.offBeginLine = offBeginLine;
2121 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2122 self.iEndLine = -1;
2123 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2124 self.offEndLine = 0;
2125 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2126 self.offAfterEnd = 0;
2127 ## The function the block resides in.
2128 self.oFunction = oFunction;
2129 ## The name of the function the block resides in. DEPRECATED.
2130 self.sFunction = oFunction.sName;
2131 ## The block number within the function.
2132 self.iInFunction = iInFunction;
2133 ## The instruction this block is associated with - can be None.
2134 self.oInstruction = oInstruction # type: Instruction
2135 ## Indentation level of the block.
2136 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2137 ## The raw lines the block is made up of.
2138 self.asLines = [] # type: List[str]
2139 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2140 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2141 self.iMacroExp = self.kiMacroExp_None;
2142 ## IEM_MC_BEGIN: Argument count.
2143 self.cArgs = -1;
2144 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2145 self.aoArgs = [] # type: List[McStmtArg]
2146 ## IEM_MC_BEGIN: Locals count.
2147 self.cLocals = -1;
2148 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2149 self.aoLocals = [] # type: List[McStmtVar]
2150 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2151 self.dsMcFlags = {} # type: Dict[str, bool]
2152 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2153 self.dsCImplFlags = {} # type: Dict[str, bool]
2154 ## Decoded statements in the block.
2155 self.aoStmts = [] # type: List[McStmt]
2156
2157 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2158 """
2159 Completes the microcode block.
2160 """
2161 assert self.iEndLine == -1;
2162 self.iEndLine = iEndLine;
2163 self.offEndLine = offEndLine;
2164 self.offAfterEnd = offAfterEnd;
2165 self.asLines = asLines;
2166
2167 def raiseDecodeError(self, sRawCode, off, sMessage):
2168 """ Raises a decoding error. """
2169 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2170 iLine = sRawCode.count('\n', 0, off);
2171 raise ParserException('%s:%d:%d: parsing error: %s'
2172 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2173
2174 def raiseStmtError(self, sName, sMessage):
2175 """ Raises a statement parser error. """
2176 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2177
2178 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2179 """ Check the parameter count, raising an error it doesn't match. """
2180 if len(asParams) != cParamsExpected:
2181 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2182 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2183 return True;
2184
2185 @staticmethod
2186 def parseMcGeneric(oSelf, sName, asParams):
2187 """ Generic parser that returns a plain McStmt object. """
2188 _ = oSelf;
2189 return McStmt(sName, asParams);
2190
2191 @staticmethod
2192 def parseMcGenericCond(oSelf, sName, asParams):
2193 """ Generic parser that returns a plain McStmtCond object. """
2194 _ = oSelf;
2195 return McStmtCond(sName, asParams);
2196
2197 kdArchVals = {
2198 'RT_ARCH_VAL_X86': True,
2199 'RT_ARCH_VAL_AMD64': True,
2200 'RT_ARCH_VAL_ARM32': True,
2201 'RT_ARCH_VAL_ARM64': True,
2202 'RT_ARCH_VAL_SPARC32': True,
2203 'RT_ARCH_VAL_SPARC64': True,
2204 };
2205
2206 @staticmethod
2207 def parseMcNativeIf(oSelf, sName, asParams):
2208 """ IEM_MC_NATIVE_IF """
2209 oSelf.checkStmtParamCount(sName, asParams, 1);
2210 if asParams[0].strip() == '0':
2211 asArchitectures = [];
2212 else:
2213 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2214 for sArch in asArchitectures:
2215 if sArch not in oSelf.kdArchVals:
2216 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2217 return McStmtNativeIf(sName, asArchitectures);
2218
2219 @staticmethod
2220 def parseMcBegin(oSelf, sName, asParams):
2221 """ IEM_MC_BEGIN """
2222 oSelf.checkStmtParamCount(sName, asParams, 4);
2223 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2224 oSelf.raiseStmtError(sName, 'Used more than once!');
2225 oSelf.cArgs = int(asParams[0]);
2226 oSelf.cLocals = int(asParams[1]);
2227
2228 if asParams[2] != '0':
2229 for sFlag in asParams[2].split('|'):
2230 sFlag = sFlag.strip();
2231 if sFlag not in g_kdMcFlags:
2232 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2233 oSelf.dsMcFlags[sFlag] = True;
2234 for sFlag2 in g_kdMcFlags[sFlag]:
2235 oSelf.dsMcFlags[sFlag2] = True;
2236
2237 if asParams[3] != '0':
2238 oSelf.parseCImplFlags(sName, asParams[3]);
2239
2240 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2241
2242 @staticmethod
2243 def parseMcArg(oSelf, sName, asParams):
2244 """ IEM_MC_ARG """
2245 oSelf.checkStmtParamCount(sName, asParams, 3);
2246 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2247 oSelf.aoArgs.append(oStmt);
2248 return oStmt;
2249
2250 @staticmethod
2251 def parseMcArgConst(oSelf, sName, asParams):
2252 """ IEM_MC_ARG_CONST """
2253 oSelf.checkStmtParamCount(sName, asParams, 4);
2254 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2255 oSelf.aoArgs.append(oStmt);
2256 return oStmt;
2257
2258 @staticmethod
2259 def parseMcArgLocalRef(oSelf, sName, asParams):
2260 """ IEM_MC_ARG_LOCAL_REF """
2261 oSelf.checkStmtParamCount(sName, asParams, 4);
2262 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2263 oSelf.aoArgs.append(oStmt);
2264 return oStmt;
2265
2266 @staticmethod
2267 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2268 """ IEM_MC_ARG_LOCAL_EFLAGS """
2269 oSelf.checkStmtParamCount(sName, asParams, 3);
2270 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2271 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2272 oSelf.aoLocals.append(oStmtLocal);
2273 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2274 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2275 oSelf.aoArgs.append(oStmtArg);
2276 return (oStmtLocal, oStmtArg,);
2277
2278 @staticmethod
2279 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2280 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2281 oSelf.checkStmtParamCount(sName, asParams, 0);
2282 # Note! Translate to IEM_MC_ARG_CONST
2283 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2284 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2285 oSelf.aoArgs.append(oStmt);
2286 return oStmt;
2287
2288 @staticmethod
2289 def parseMcLocal(oSelf, sName, asParams):
2290 """ IEM_MC_LOCAL """
2291 oSelf.checkStmtParamCount(sName, asParams, 2);
2292 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2293 oSelf.aoLocals.append(oStmt);
2294 return oStmt;
2295
2296 @staticmethod
2297 def parseMcLocalAssign(oSelf, sName, asParams):
2298 """ IEM_MC_LOCAL_ASSIGN """
2299 oSelf.checkStmtParamCount(sName, asParams, 3);
2300 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2301 oSelf.aoLocals.append(oStmt);
2302 return oStmt;
2303
2304 @staticmethod
2305 def parseMcLocalConst(oSelf, sName, asParams):
2306 """ IEM_MC_LOCAL_CONST """
2307 oSelf.checkStmtParamCount(sName, asParams, 3);
2308 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2309 oSelf.aoLocals.append(oStmt);
2310 return oStmt;
2311
2312 @staticmethod
2313 def parseMcLocalEFlags(oSelf, sName, asParams):
2314 """ IEM_MC_LOCAL_EFLAGS"""
2315 oSelf.checkStmtParamCount(sName, asParams, 1);
2316 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2317 oSelf.aoLocals.append(oStmt);
2318 return oStmt;
2319
2320 @staticmethod
2321 def parseMcCallAImpl(oSelf, sName, asParams):
2322 """ IEM_MC_CALL_AIMPL_3|4 """
2323 cArgs = int(sName[-1]);
2324 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2325 return McStmtCall(sName, asParams, 1, 0);
2326
2327 @staticmethod
2328 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2329 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2330 cArgs = int(sName[-1]);
2331 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2332 return McStmtCall(sName, asParams, 0);
2333
2334 @staticmethod
2335 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2336 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2337 cArgs = int(sName[-1]);
2338 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2339 return McStmtCall(sName, asParams, 0);
2340
2341 @staticmethod
2342 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2343 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2344 cArgs = int(sName[-1]);
2345 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2346 return McStmtCall(sName, asParams, 0);
2347
2348 @staticmethod
2349 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2350 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2351 cArgs = int(sName[-1]);
2352 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2353 return McStmtCall(sName, asParams, 0);
2354
2355 @staticmethod
2356 def parseMcCallSseAImpl(oSelf, sName, asParams):
2357 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2358 cArgs = int(sName[-1]);
2359 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2360 return McStmtCall(sName, asParams, 0);
2361
2362 def parseCImplFlags(self, sName, sFlags):
2363 """
2364 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2365 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2366 """
2367 if sFlags != '0':
2368 sFlags = self.stripComments(sFlags);
2369 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2370 for sFlag in sFlags.split('|'):
2371 sFlag = sFlag.strip();
2372 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2373 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2374 #print('debug: %s' % sFlag)
2375 if sFlag not in g_kdCImplFlags:
2376 if sFlag == '0':
2377 continue;
2378 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2379 self.dsCImplFlags[sFlag] = True;
2380 for sFlag2 in g_kdCImplFlags[sFlag]:
2381 self.dsCImplFlags[sFlag2] = True;
2382 return None;
2383
2384 @staticmethod
2385 def parseMcCallCImpl(oSelf, sName, asParams):
2386 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2387 cArgs = int(sName[-1]);
2388 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2389 oSelf.parseCImplFlags(sName, asParams[0]);
2390 return McStmtCall(sName, asParams, 2);
2391
2392 @staticmethod
2393 def parseMcDeferToCImpl(oSelf, sName, asParams):
2394 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2395 # Note! This code is called by workerIemMcDeferToCImplXRet.
2396 #print('debug: %s, %s,...' % (sName, asParams[0],));
2397 cArgs = int(sName[-5]);
2398 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2399 oSelf.parseCImplFlags(sName, asParams[0]);
2400 return McStmtCall(sName, asParams, 2);
2401
2402 @staticmethod
2403 def stripComments(sCode):
2404 """ Returns sCode with comments removed. """
2405 off = 0;
2406 while off < len(sCode):
2407 off = sCode.find('/', off);
2408 if off < 0 or off + 1 >= len(sCode):
2409 break;
2410
2411 if sCode[off + 1] == '/':
2412 # C++ comment.
2413 offEnd = sCode.find('\n', off + 2);
2414 if offEnd < 0:
2415 return sCode[:off].rstrip();
2416 sCode = sCode[ : off] + sCode[offEnd : ];
2417 off += 1;
2418
2419 elif sCode[off + 1] == '*':
2420 # C comment
2421 offEnd = sCode.find('*/', off + 2);
2422 if offEnd < 0:
2423 return sCode[:off].rstrip();
2424 sSep = ' ';
2425 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2426 sSep = '';
2427 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2428 off += len(sSep);
2429
2430 else:
2431 # Not a comment.
2432 off += 1;
2433 return sCode;
2434
2435 @staticmethod
2436 def extractParam(sCode, offParam):
2437 """
2438 Extracts the parameter value at offParam in sCode.
2439 Returns stripped value and the end offset of the terminating ',' or ')'.
2440 """
2441 # Extract it.
2442 cNesting = 0;
2443 offStart = offParam;
2444 while offParam < len(sCode):
2445 ch = sCode[offParam];
2446 if ch == '(':
2447 cNesting += 1;
2448 elif ch == ')':
2449 if cNesting == 0:
2450 break;
2451 cNesting -= 1;
2452 elif ch == ',' and cNesting == 0:
2453 break;
2454 offParam += 1;
2455 return (sCode[offStart : offParam].strip(), offParam);
2456
2457 @staticmethod
2458 def extractParams(sCode, offOpenParen):
2459 """
2460 Parses a parameter list.
2461 Returns the list of parameter values and the offset of the closing parentheses.
2462 Returns (None, len(sCode)) on if no closing parentheses was found.
2463 """
2464 assert sCode[offOpenParen] == '(';
2465 asParams = [];
2466 off = offOpenParen + 1;
2467 while off < len(sCode):
2468 ch = sCode[off];
2469 if ch.isspace():
2470 off += 1;
2471 elif ch != ')':
2472 (sParam, off) = McBlock.extractParam(sCode, off);
2473 asParams.append(sParam);
2474 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2475 if sCode[off] == ',':
2476 off += 1;
2477 else:
2478 return (asParams, off);
2479 return (None, off);
2480
2481 @staticmethod
2482 def findClosingBraces(sCode, off, offStop):
2483 """
2484 Finds the matching '}' for the '{' at off in sCode.
2485 Returns offset of the matching '}' on success, otherwise -1.
2486
2487 Note! Does not take comments into account.
2488 """
2489 cDepth = 1;
2490 off += 1;
2491 while off < offStop:
2492 offClose = sCode.find('}', off, offStop);
2493 if offClose < 0:
2494 break;
2495 cDepth += sCode.count('{', off, offClose);
2496 cDepth -= 1;
2497 if cDepth == 0:
2498 return offClose;
2499 off = offClose + 1;
2500 return -1;
2501
2502 @staticmethod
2503 def countSpacesAt(sCode, off, offStop):
2504 """ Returns the number of space characters at off in sCode. """
2505 offStart = off;
2506 while off < offStop and sCode[off].isspace():
2507 off += 1;
2508 return off - offStart;
2509
2510 @staticmethod
2511 def skipSpacesAt(sCode, off, offStop):
2512 """ Returns first offset at or after off for a non-space character. """
2513 return off + McBlock.countSpacesAt(sCode, off, offStop);
2514
2515 @staticmethod
2516 def isSubstrAt(sStr, off, sSubStr):
2517 """ Returns true of sSubStr is found at off in sStr. """
2518 return sStr[off : off + len(sSubStr)] == sSubStr;
2519
2520 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2521 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2522 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2523 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2524 + r')');
2525
2526 kaasConditions = (
2527 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2528 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2529 );
2530 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2531 """
2532 Decodes sRawCode[off : offStop].
2533
2534 Returns list of McStmt instances.
2535 Raises ParserException on failure.
2536 """
2537 if offStop < 0:
2538 offStop = len(sRawCode);
2539 aoStmts = [];
2540 while off < offStop:
2541 ch = sRawCode[off];
2542
2543 #
2544 # Skip spaces and comments.
2545 #
2546 if ch.isspace():
2547 off += 1;
2548
2549 elif ch == '/':
2550 ch = sRawCode[off + 1];
2551 if ch == '/': # C++ comment.
2552 off = sRawCode.find('\n', off + 2);
2553 if off < 0:
2554 break;
2555 off += 1;
2556 elif ch == '*': # C comment.
2557 off = sRawCode.find('*/', off + 2);
2558 if off < 0:
2559 break;
2560 off += 2;
2561 else:
2562 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2563
2564 #
2565 # Is it a MC statement.
2566 #
2567 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2568 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2569 # Extract it and strip comments from it.
2570 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2571 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2572 else: iCond = -1;
2573 if iCond < 0:
2574 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2575 if offEnd <= off:
2576 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2577 else:
2578 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2579 if offEnd <= off:
2580 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2581 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2582 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2583 offEnd -= 1;
2584 while offEnd > off and sRawCode[offEnd - 1].isspace():
2585 offEnd -= 1;
2586
2587 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2588
2589 # Isolate the statement name.
2590 offOpenParen = sRawStmt.find('(');
2591 if offOpenParen < 0:
2592 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2593 sName = sRawStmt[: offOpenParen].strip();
2594
2595 # Extract the parameters.
2596 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2597 if asParams is None:
2598 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2599 if offCloseParen + 1 != len(sRawStmt):
2600 self.raiseDecodeError(sRawCode, off,
2601 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2602
2603 # Hand it to the handler.
2604 fnParser = g_dMcStmtParsers.get(sName);
2605 if not fnParser:
2606 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2607 fnParser = fnParser[0];
2608 oStmt = fnParser(self, sName, asParams);
2609 if not isinstance(oStmt, (list, tuple)):
2610 aoStmts.append(oStmt);
2611 else:
2612 aoStmts.extend(oStmt);
2613
2614 #
2615 # If conditional, we need to parse the whole statement.
2616 #
2617 # For reasons of simplicity, we assume the following structure
2618 # and parse each branch in a recursive call:
2619 # IEM_MC_IF_XXX() {
2620 # IEM_MC_WHATEVER();
2621 # } IEM_MC_ELSE() {
2622 # IEM_MC_WHATEVER();
2623 # } IEM_MC_ENDIF();
2624 #
2625 if iCond >= 0:
2626 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2627 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2628
2629 # Find start of the IF block:
2630 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2631 if sRawCode[offBlock1] != '{':
2632 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2633
2634 # Find the end of it.
2635 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2636 if offBlock1End < 0:
2637 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2638
2639 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2640
2641 # Is there an else section?
2642 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2643 sElseNm = self.kaasConditions[iCond][1];
2644 if self.isSubstrAt(sRawCode, off, sElseNm):
2645 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2646 if sRawCode[off] != '(':
2647 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ')':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2651
2652 # Find start of the ELSE block.
2653 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2654 if sRawCode[offBlock2] != '{':
2655 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2656
2657 # Find the end of it.
2658 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2659 if offBlock2End < 0:
2660 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2661
2662 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2663 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2664
2665 # Parse past the endif statement.
2666 sEndIfNm = self.kaasConditions[iCond][2];
2667 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2668 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2669 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2670 if sRawCode[off] != '(':
2671 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2672 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2673 if sRawCode[off] != ')':
2674 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2675 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2676 if sRawCode[off] != ';':
2677 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2678 off += 1;
2679
2680 else:
2681 # Advance.
2682 off = offEnd + 1;
2683
2684 #
2685 # Otherwise it must be a C/C++ statement of sorts.
2686 #
2687 else:
2688 # Find the end of the statement. if and else requires special handling.
2689 sCondExpr = None;
2690 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2691 if oMatch:
2692 if oMatch.group(1)[-1] == '(':
2693 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2694 else:
2695 offEnd = oMatch.end();
2696 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2697 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2698 elif ch == '#':
2699 offEnd = sRawCode.find('\n', off, offStop);
2700 if offEnd < 0:
2701 offEnd = offStop;
2702 offEnd -= 1;
2703 while offEnd > off and sRawCode[offEnd - 1].isspace():
2704 offEnd -= 1;
2705 else:
2706 offEnd = sRawCode.find(';', off);
2707 if offEnd < 0:
2708 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2709
2710 # Check this and the following statement whether it might have
2711 # something to do with decoding. This is a statement filter
2712 # criteria when generating the threaded functions blocks.
2713 offNextEnd = sRawCode.find(';', offEnd + 1);
2714 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2715 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2716 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2717 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2718 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2719 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2720 );
2721
2722 if not oMatch:
2723 if ch != '#':
2724 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2725 else:
2726 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2727 off = offEnd + 1;
2728 elif oMatch.group(1).startswith('if'):
2729 #
2730 # if () xxx [else yyy] statement.
2731 #
2732 oStmt = McCppCond(sCondExpr, fDecode);
2733 aoStmts.append(oStmt);
2734 off = offEnd + 1;
2735
2736 # Following the if () we can either have a {} containing zero or more statements
2737 # or we have a single statement.
2738 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2739 if sRawCode[offBlock1] == '{':
2740 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2741 if offBlock1End < 0:
2742 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2743 offBlock1 += 1;
2744 else:
2745 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2746 if offBlock1End < 0:
2747 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2748
2749 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2750
2751 # The else is optional and can likewise be followed by {} or a single statement.
2752 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2753 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2754 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2755 if sRawCode[offBlock2] == '{':
2756 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2757 if offBlock2End < 0:
2758 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2759 offBlock2 += 1;
2760 else:
2761 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2762 if offBlock2End < 0:
2763 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2764
2765 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2766 off = offBlock2End + 1;
2767
2768 elif oMatch.group(1) == 'else':
2769 # Problematic 'else' branch, typically involving #ifdefs.
2770 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2771
2772 return aoStmts;
2773
2774 def decode(self):
2775 """
2776 Decodes the block, populating self.aoStmts if necessary.
2777 Returns the statement list.
2778 Raises ParserException on failure.
2779 """
2780 if not self.aoStmts:
2781 self.aoStmts = self.decodeCode(''.join(self.asLines));
2782 return self.aoStmts;
2783
2784
2785 def checkForTooEarlyEffSegUse(self, aoStmts):
2786 """
2787 Checks if iEffSeg is used before the effective address has been decoded.
2788 Returns None on success, error string on failure.
2789
2790 See r158454 for an example of this issue.
2791 """
2792
2793 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2794 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2795 # as we're ASSUMING these will not occur before address calculation.
2796 for iStmt, oStmt in enumerate(aoStmts):
2797 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2798 while iStmt > 0:
2799 iStmt -= 1;
2800 oStmt = aoStmts[iStmt];
2801 for sArg in oStmt.asParams:
2802 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2803 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2804 break;
2805 return None;
2806
2807 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2808 kdDecodeCppStmtOkayAfterDone = {
2809 'IEMOP_HLP_IN_VMX_OPERATION': True,
2810 'IEMOP_HLP_VMX_INSTR': True,
2811 };
2812
2813 def checkForDoneDecoding(self, aoStmts):
2814 """
2815 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2816 invocation.
2817 Returns None on success, error string on failure.
2818
2819 This ensures safe instruction restarting in case the recompiler runs
2820 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2821 entries).
2822 """
2823
2824 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2825 # don't need to look.
2826 cIemOpHlpDone = 0;
2827 for iStmt, oStmt in enumerate(aoStmts):
2828 if oStmt.isCppStmt():
2829 #print('dbg: #%u[%u]: %s %s (%s)'
2830 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2831
2832 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2833 if oMatch:
2834 sFirstWord = oMatch.group(1);
2835 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2836 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2837 cIemOpHlpDone += 1;
2838 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2839 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2840 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2841 else:
2842 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2843 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2844 cIemOpHlpDone += 1;
2845 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2846 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2847 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2848 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2849 if cIemOpHlpDone == 1:
2850 return None;
2851 if cIemOpHlpDone > 1:
2852 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2853 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2854
2855 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2856 """
2857 Checks that the register references are placed after register fetches
2858 from the same register class.
2859 Returns None on success, error string on failure.
2860
2861 Example:
2862 SHL CH, CL
2863
2864 If the CH reference is created first, the fetching of CL will cause the
2865 RCX guest register to have an active shadow register when it's being
2866 updated. The shadow register will then be stale after the SHL operation
2867 completes, without us noticing.
2868
2869 It's easier to ensure we've got correct code than complicating the
2870 recompiler code with safeguards here.
2871 """
2872 for iStmt, oStmt in enumerate(aoStmts):
2873 if not oStmt.isCppStmt():
2874 offRef = oStmt.sName.find("_REF_");
2875 if offRef > 0:
2876 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2877 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2878 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2879 sClass = 'FPUREG';
2880 else:
2881 offUnderscore = oStmt.sName.find('_', offRef + 5);
2882 if offUnderscore > 0:
2883 assert offUnderscore > offRef;
2884 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2885 else:
2886 sClass = oStmt.sName[offRef + 5];
2887 asRegRefClasses[sClass] = True;
2888 else:
2889 offFetch = oStmt.sName.find("_FETCH_");
2890 if offFetch > 0:
2891 sClass = oStmt.sName[offFetch + 7 : ];
2892 if not sClass.startswith("MEM"):
2893 offUnderscore = sClass.find('_');
2894 if offUnderscore >= 0:
2895 assert offUnderscore > 0;
2896 sClass = sClass[:offUnderscore];
2897 if sClass in asRegRefClasses:
2898 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2899 % (iStmt + 1, oStmt.sName,);
2900
2901 # Go into branches.
2902 if isinstance(oStmt, McStmtCond):
2903 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2904 if sRet:
2905 return sRet;
2906 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2907 if sRet:
2908 return sRet;
2909 return None;
2910
2911 def check(self):
2912 """
2913 Performs some sanity checks on the block.
2914 Returns error string list, empty if all is fine.
2915 """
2916 aoStmts = self.decode();
2917 asRet = [];
2918
2919 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2920 if sRet:
2921 asRet.append(sRet);
2922
2923 sRet = self.checkForDoneDecoding(aoStmts);
2924 if sRet:
2925 asRet.append(sRet);
2926
2927 sRet = self.checkForFetchAfterRef(aoStmts, {});
2928 if sRet:
2929 asRet.append(sRet);
2930
2931 return asRet;
2932
2933
2934## Temporary flag for enabling / disabling experimental MCs depending on the
2935## SIMD register allocator.
2936g_fNativeSimd = True;
2937
2938## IEM_MC_XXX -> parser + info dictionary.
2939#
2940# The info columns:
2941# - col 1+0: boolean entry indicating whether the statement modifies state and
2942# must not be used before IEMOP_HL_DONE_*.
2943# - col 1+1: boolean entry indicating similar to the previous column but is
2944# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2945# The difference is that most IEM_MC_IF_XXX entries are False here.
2946# - col 1+2: boolean entry indicating native recompiler support.
2947#
2948# The raw table was generated via the following command
2949# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2950# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2951# pylint: disable=line-too-long
2952g_dMcStmtParsers = {
2953 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2954 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2955 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2956 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2957 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2958 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2959 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2960 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2961 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2962 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2963 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2964 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2965 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2966 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2967 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2968 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2969 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2970 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2971 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2972 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2973 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2974 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2975 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2976 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2977 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2978 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2979 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2980 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2981 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2982 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2983 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2984 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2985 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2986 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2987 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2988 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2989 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2990 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2991 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2992 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2993 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2994 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2995 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2996 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2997 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2998 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2999 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
3000 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
3001 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
3002 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
3003 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
3004 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
3005 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
3006 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
3007 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
3008 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
3009 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
3010 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
3011 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3012 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3013 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3014 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3015 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3016 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3017 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3018 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3019 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3020 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3021 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3022 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3023 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3024 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
3025 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3026 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3027 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3028 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3029 'IEM_MC_COMMIT_EFLAGS_OPT': (McBlock.parseMcGeneric, True, True, True, ),
3030 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3031 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3032 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3033 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3034 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3035 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3036 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3037 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3038 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3039 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3040 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3041 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3042 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3043 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3044 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3045 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3046 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3047 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3048 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3049 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3050 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3051 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3052 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3053 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3054 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3055 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3056 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3057 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3058 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3059 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3060 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3061 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3062 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3063 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3064 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3065 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3066 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3067 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3068 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3069 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3070 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3071 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3072 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3073 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3076 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3077 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3078 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3079 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3080 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3081 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3082 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3083 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3084 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3085 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3086 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3087 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3088 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3089 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3090 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3091 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3092 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3093 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3094 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3095 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3096 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3097 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3098 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3099 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3100 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3101 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3102 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3103 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3107 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FETCH_MREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3111 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3112 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3113 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3114 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3115 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3116 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3117 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3118 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3119 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3120 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3121 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3122 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3123 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3124 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3125 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3126 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3127 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3128 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3129 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3130 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3131 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3132 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3133 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3134 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3135 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3136 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3137 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3138 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3139 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3140 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3141 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3142 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3143 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3144 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3145 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3146 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3147 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3148 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3149 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3150 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3151 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3152 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3153 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3154 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3155 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3156 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3157 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3158 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3159 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3160 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3161 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3162 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3163 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3164 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3165 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3166 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3167 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3168 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3169 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, True, ),
3170 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, g_fNativeSimd),
3171 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3172 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3173 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3174 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3175 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3176 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3177 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3178 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3179 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3180 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3181 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3182 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3183 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3184 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3188 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3189 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3190 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3191 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3193 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3199 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3200 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3201 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3202 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3203 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3204 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3205 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3215 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3216 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3217 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3218 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3219 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3220 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3221 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3222 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3223 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3224 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3225 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3226 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3227 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3228 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3230 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3231 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3232 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3233 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3234 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3235 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3236 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3237 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3238 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3239 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3240 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3241 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3242 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3243 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3244 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3245 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3246 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3247 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3248 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3249 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3250 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3251 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3252 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3253 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3254 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3255 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3256 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3257 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3258 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3259 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3260 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3261 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3262 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3263 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3264 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3265 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3266 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, True, ),
3267 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3268 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3269 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3270 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3271 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3272 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3273 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3274 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3275 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3276 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3277 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3278 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3279 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3280 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3281 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3282 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3283 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3284 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3285 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3286 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3287 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3288 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3289 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3290 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3291 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3292 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3293 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3294 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3295 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3296 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3297 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3298 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3299 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3300 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3301 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3302 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3303 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3304 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3305 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3311 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3312 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3313 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3314 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3315 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3316 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3317 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3318 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3319 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3320 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3321 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3322 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3323 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3324 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3325 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3326 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3327 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3328 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3329 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3337 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3338 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3339 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3343 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3344 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3346 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3347 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3348 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3349 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3350 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3351 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3352 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3353 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3358 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3359 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3360 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3361 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3362 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3363 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3364 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3365 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3366 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3367 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3368 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3369 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3370 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3371 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3372 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3373 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3374 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3375 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3376 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3377 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3378 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3379 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3380 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3381 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3382 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3383 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3384 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3385 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3386 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3387 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3388};
3389# pylint: enable=line-too-long
3390
3391## List of microcode blocks.
3392g_aoMcBlocks = [] # type: List[McBlock]
3393
3394
3395
3396class ParserException(Exception):
3397 """ Parser exception """
3398 def __init__(self, sMessage):
3399 Exception.__init__(self, sMessage);
3400
3401
3402class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3403 """
3404 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3405 """
3406
3407 ## @name Parser state.
3408 ## @{
3409 kiCode = 0;
3410 kiCommentMulti = 1;
3411 ## @}
3412
3413 class Macro(object):
3414 """ Macro """
3415 def __init__(self, sName, asArgs, sBody, iLine):
3416 self.sName = sName; ##< The macro name.
3417 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3418 self.sBody = sBody;
3419 self.iLine = iLine;
3420 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3421
3422 @staticmethod
3423 def _needSpace(ch):
3424 """ This is just to make the expanded output a bit prettier. """
3425 return ch.isspace() and ch != '(';
3426
3427 def expandMacro(self, oParent, asArgs = None):
3428 """ Expands the macro body with the given arguments. """
3429 _ = oParent;
3430 sBody = self.sBody;
3431
3432 if self.oReArgMatch:
3433 assert len(asArgs) == len(self.asArgs);
3434 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3435
3436 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3437 oMatch = self.oReArgMatch.search(sBody);
3438 while oMatch:
3439 sName = oMatch.group(2);
3440 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3441 sValue = dArgs[sName];
3442 sPre = '';
3443 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3444 sPre = ' ';
3445 sPost = '';
3446 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3447 sPost = ' ';
3448 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3449 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3450 else:
3451 assert not asArgs;
3452
3453 return sBody;
3454
3455 class PreprocessorConditional(object):
3456 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3457
3458 ## Known defines.
3459 # - A value of 1 indicates that it's always defined.
3460 # - A value of 0 if it's always undefined
3461 # - A value of -1 if it's an arch and it depends of script parameters.
3462 # - A value of -2 if it's not recognized when filtering MC blocks.
3463 kdKnownDefines = {
3464 'IEM_WITH_ONE_BYTE_TABLE': 1,
3465 'IEM_WITH_TWO_BYTE_TABLE': 1,
3466 'IEM_WITH_THREE_0F_38': 1,
3467 'IEM_WITH_THREE_0F_3A': 1,
3468 'IEM_WITH_THREE_BYTE_TABLES': 1,
3469 'IEM_WITH_3DNOW': 1,
3470 'IEM_WITH_3DNOW_TABLE': 1,
3471 'IEM_WITH_VEX': 1,
3472 'IEM_WITH_VEX_TABLES': 1,
3473 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3474 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3475 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3476 'LOG_ENABLED': 1,
3477 'RT_WITHOUT_PRAGMA_ONCE': 0,
3478 'TST_IEM_CHECK_MC': 0,
3479 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3480 'RT_ARCH_AMD64': -1,
3481 'RT_ARCH_ARM64': -1,
3482 'RT_ARCH_ARM32': -1,
3483 'RT_ARCH_X86': -1,
3484 'RT_ARCH_SPARC': -1,
3485 'RT_ARCH_SPARC64': -1,
3486 };
3487 kdBuildArchToIprt = {
3488 'amd64': 'RT_ARCH_AMD64',
3489 'arm64': 'RT_ARCH_ARM64',
3490 'sparc32': 'RT_ARCH_SPARC64',
3491 };
3492 ## For parsing the next defined(xxxx).
3493 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3494
3495 def __init__(self, sType, sExpr):
3496 self.sType = sType;
3497 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3498 self.aoElif = [] # type: List[PreprocessorConditional]
3499 self.fInElse = [];
3500 if sType in ('if', 'elif'):
3501 self.checkExpression(sExpr);
3502 else:
3503 self.checkSupportedDefine(sExpr)
3504
3505 @staticmethod
3506 def checkSupportedDefine(sDefine):
3507 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3508 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3509 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3510 return True;
3511 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3512 return True;
3513 raise Exception('Unsupported define: %s' % (sDefine,));
3514
3515 @staticmethod
3516 def checkExpression(sExpr):
3517 """ Check that the expression is supported. Raises exception if not. """
3518 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3519 if sExpr in ('0', '1'):
3520 return True;
3521
3522 off = 0;
3523 cParan = 0;
3524 while off < len(sExpr):
3525 ch = sExpr[off];
3526
3527 # Unary operator or parentheses:
3528 if ch in ('(', '!'):
3529 if ch == '(':
3530 cParan += 1;
3531 off += 1;
3532 else:
3533 # defined(xxxx)
3534 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3535 if oMatch:
3536 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3537 elif sExpr[off:] != '1':
3538 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3539 off = oMatch.end();
3540
3541 # Look for closing parentheses.
3542 while off < len(sExpr) and sExpr[off].isspace():
3543 off += 1;
3544 if cParan > 0:
3545 while off < len(sExpr) and sExpr[off] == ')':
3546 if cParan <= 0:
3547 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3548 cParan -= 1;
3549 off += 1;
3550 while off < len(sExpr) and sExpr[off].isspace():
3551 off += 1;
3552
3553 # Look for binary operator.
3554 if off >= len(sExpr):
3555 break;
3556 if sExpr[off:off + 2] in ('||', '&&'):
3557 off += 2;
3558 else:
3559 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3560
3561 # Skip spaces.
3562 while off < len(sExpr) and sExpr[off].isspace():
3563 off += 1;
3564 if cParan != 0:
3565 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3566 return True;
3567
3568 @staticmethod
3569 def isArchIncludedInExpr(sExpr, sArch):
3570 """ Checks if sArch is included in the given expression. """
3571 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3572 if sExpr == '0':
3573 return False;
3574 if sExpr == '1':
3575 return True;
3576 off = 0;
3577 while off < len(sExpr):
3578 # defined(xxxx)
3579 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3580 if not oMatch:
3581 if sExpr[off:] == '1':
3582 return True;
3583 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3584 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3585 return True;
3586 off = oMatch.end();
3587
3588 # Look for OR operator.
3589 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3590 off += 1;
3591 if off >= len(sExpr):
3592 break;
3593 if sExpr.startswith('||'):
3594 off += 2;
3595 else:
3596 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3597
3598 return False;
3599
3600 @staticmethod
3601 def matchArch(sDefine, sArch):
3602 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3603 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3604
3605 @staticmethod
3606 def matchDefined(sExpr, sArch):
3607 """ Check the result of an ifdef/ifndef expression, given sArch. """
3608 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3609 if iDefine == -2:
3610 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3611 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3612
3613 def isArchIncludedInPrimaryBlock(self, sArch):
3614 """ Checks if sArch is included in the (primary) 'if' block. """
3615 if self.sType == 'ifdef':
3616 return self.matchDefined(self.sExpr, sArch);
3617 if self.sType == 'ifndef':
3618 return not self.matchDefined(self.sExpr, sArch);
3619 return self.isArchIncludedInExpr(self.sExpr, sArch);
3620
3621 @staticmethod
3622 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3623 """ Checks if sArch is included in the current conditional block. """
3624 _ = iLine;
3625 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3626 for oCond in aoCppCondStack:
3627 if oCond.isArchIncludedInPrimaryBlock(sArch):
3628 if oCond.aoElif or oCond.fInElse:
3629 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3630 return False;
3631 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3632 else:
3633 fFine = False;
3634 for oElifCond in oCond.aoElif:
3635 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3636 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3637 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3638 return False;
3639 fFine = True;
3640 if not fFine and not oCond.fInElse:
3641 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3642 return False;
3643 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3644 return True;
3645
3646 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3647 self.sSrcFile = sSrcFile;
3648 self.asLines = asLines;
3649 self.iLine = 0;
3650 self.iState = self.kiCode;
3651 self.sComment = '';
3652 self.iCommentLine = 0;
3653 self.aoCurInstrs = [] # type: List[Instruction]
3654 self.oCurFunction = None # type: DecoderFunction
3655 self.iMcBlockInFunc = 0;
3656 self.oCurMcBlock = None # type: McBlock
3657 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3658 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3659 if oInheritMacrosFrom:
3660 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3661 self.oReMacros = oInheritMacrosFrom.oReMacros;
3662 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3663 self.sHostArch = sHostArch;
3664
3665 assert sDefaultMap in g_dInstructionMaps;
3666 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3667
3668 self.cTotalInstr = 0;
3669 self.cTotalStubs = 0;
3670 self.cTotalTagged = 0;
3671 self.cTotalMcBlocks = 0;
3672
3673 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3674 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3675 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3676 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3677 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3678 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3679 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3680 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3681 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3682 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3683 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3684 self.fDebug = True;
3685 self.fDebugMc = False;
3686 self.fDebugPreproc = False;
3687
3688 self.dTagHandlers = {
3689 '@opbrief': self.parseTagOpBrief,
3690 '@opdesc': self.parseTagOpDesc,
3691 '@opmnemonic': self.parseTagOpMnemonic,
3692 '@op1': self.parseTagOpOperandN,
3693 '@op2': self.parseTagOpOperandN,
3694 '@op3': self.parseTagOpOperandN,
3695 '@op4': self.parseTagOpOperandN,
3696 '@oppfx': self.parseTagOpPfx,
3697 '@opmaps': self.parseTagOpMaps,
3698 '@opcode': self.parseTagOpcode,
3699 '@opcodesub': self.parseTagOpcodeSub,
3700 '@openc': self.parseTagOpEnc,
3701 #@opfltest: Lists all flags that will be used as input in some way.
3702 '@opfltest': self.parseTagOpEFlags,
3703 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3704 '@opflmodify': self.parseTagOpEFlags,
3705 #@opflclear: Lists all flags that will be set (set to 1).
3706 '@opflset': self.parseTagOpEFlags,
3707 #@opflclear: Lists all flags that will be cleared (set to 0).
3708 '@opflclear': self.parseTagOpEFlags,
3709 #@opflundef: List of flag documented as undefined.
3710 '@opflundef': self.parseTagOpEFlags,
3711 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3712 '@opflclass': self.parseTagOpEFlagsClass,
3713 '@ophints': self.parseTagOpHints,
3714 '@opdisenum': self.parseTagOpDisEnum,
3715 '@opmincpu': self.parseTagOpMinCpu,
3716 '@opcpuid': self.parseTagOpCpuId,
3717 '@opgroup': self.parseTagOpGroup,
3718 '@opunused': self.parseTagOpUnusedInvalid,
3719 '@opinvalid': self.parseTagOpUnusedInvalid,
3720 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3721 '@optest': self.parseTagOpTest,
3722 '@optestign': self.parseTagOpTestIgnore,
3723 '@optestignore': self.parseTagOpTestIgnore,
3724 '@opcopytests': self.parseTagOpCopyTests,
3725 '@oponly': self.parseTagOpOnlyTest,
3726 '@oponlytest': self.parseTagOpOnlyTest,
3727 '@opxcpttype': self.parseTagOpXcptType,
3728 '@opstats': self.parseTagOpStats,
3729 '@opfunction': self.parseTagOpFunction,
3730 '@opdone': self.parseTagOpDone,
3731 };
3732 for i in range(48):
3733 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3734 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3735
3736 self.asErrors = [];
3737
3738 def raiseError(self, sMessage):
3739 """
3740 Raise error prefixed with the source and line number.
3741 """
3742 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3743
3744 def raiseCommentError(self, iLineInComment, sMessage):
3745 """
3746 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3747 """
3748 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3749
3750 def error(self, sMessage):
3751 """
3752 Adds an error.
3753 returns False;
3754 """
3755 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3756 return False;
3757
3758 def errorOnLine(self, iLine, sMessage):
3759 """
3760 Adds an error.
3761 returns False;
3762 """
3763 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3764 return False;
3765
3766 def errorComment(self, iLineInComment, sMessage):
3767 """
3768 Adds a comment error.
3769 returns False;
3770 """
3771 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3772 return False;
3773
3774 def printErrors(self):
3775 """
3776 Print the errors to stderr.
3777 Returns number of errors.
3778 """
3779 if self.asErrors:
3780 sys.stderr.write(u''.join(self.asErrors));
3781 return len(self.asErrors);
3782
3783 def debug(self, sMessage):
3784 """
3785 For debugging.
3786 """
3787 if self.fDebug:
3788 print('debug: %s' % (sMessage,), file = sys.stderr);
3789
3790 def stripComments(self, sLine):
3791 """
3792 Returns sLine with comments stripped.
3793
3794 Complains if traces of incomplete multi-line comments are encountered.
3795 """
3796 sLine = self.oReComment.sub(" ", sLine);
3797 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3798 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3799 return sLine;
3800
3801 def parseFunctionTable(self, sLine):
3802 """
3803 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3804
3805 Note! Updates iLine as it consumes the whole table.
3806 """
3807
3808 #
3809 # Extract the table name.
3810 #
3811 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3812 oMap = g_dInstructionMapsByIemName.get(sName);
3813 if not oMap:
3814 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3815 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3816
3817 #
3818 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3819 # entries per byte:
3820 # no prefix, 066h prefix, f3h prefix, f2h prefix
3821 # Those tables has 256 & 32 entries respectively.
3822 #
3823 cEntriesPerByte = 4;
3824 cValidTableLength = 1024;
3825 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3826
3827 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3828 if oEntriesMatch:
3829 cEntriesPerByte = 1;
3830 cValidTableLength = int(oEntriesMatch.group(1));
3831 asPrefixes = (None,);
3832
3833 #
3834 # The next line should be '{' and nothing else.
3835 #
3836 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3837 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3838 self.iLine += 1;
3839
3840 #
3841 # Parse till we find the end of the table.
3842 #
3843 iEntry = 0;
3844 while self.iLine < len(self.asLines):
3845 # Get the next line and strip comments and spaces (assumes no
3846 # multi-line comments).
3847 sLine = self.asLines[self.iLine];
3848 self.iLine += 1;
3849 sLine = self.stripComments(sLine).strip();
3850
3851 # Split the line up into entries, expanding IEMOP_X4 usage.
3852 asEntries = sLine.split(',');
3853 for i in range(len(asEntries) - 1, -1, -1):
3854 sEntry = asEntries[i].strip();
3855 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3856 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3857 asEntries.insert(i + 1, sEntry);
3858 asEntries.insert(i + 1, sEntry);
3859 asEntries.insert(i + 1, sEntry);
3860 if sEntry:
3861 asEntries[i] = sEntry;
3862 else:
3863 del asEntries[i];
3864
3865 # Process the entries.
3866 for sEntry in asEntries:
3867 if sEntry in ('};', '}'):
3868 if iEntry != cValidTableLength:
3869 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3870 return True;
3871 if sEntry.startswith('iemOp_Invalid'):
3872 pass; # skip
3873 else:
3874 # Look up matching instruction by function.
3875 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3876 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3877 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3878 if aoInstr:
3879 if not isinstance(aoInstr, list):
3880 aoInstr = [aoInstr,];
3881 oInstr = None;
3882 for oCurInstr in aoInstr:
3883 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3884 pass;
3885 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3886 oCurInstr.sPrefix = sPrefix;
3887 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3888 oCurInstr.sOpcode = sOpcode;
3889 oCurInstr.sPrefix = sPrefix;
3890 else:
3891 continue;
3892 oInstr = oCurInstr;
3893 break;
3894 if not oInstr:
3895 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3896 aoInstr.append(oInstr);
3897 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3898 g_aoAllInstructions.append(oInstr);
3899 oMap.aoInstructions.append(oInstr);
3900 else:
3901 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3902 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3903 iEntry += 1;
3904
3905 return self.error('Unexpected end of file in PFNIEMOP table');
3906
3907 def addInstruction(self, iLine = None):
3908 """
3909 Adds an instruction.
3910 """
3911 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3912 g_aoAllInstructions.append(oInstr);
3913 self.aoCurInstrs.append(oInstr);
3914 return oInstr;
3915
3916 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3917 """
3918 Derives the mnemonic and operands from a IEM stats base name like string.
3919 """
3920 if oInstr.sMnemonic is None:
3921 asWords = sStats.split('_');
3922 oInstr.sMnemonic = asWords[0].lower();
3923 if len(asWords) > 1 and not oInstr.aoOperands:
3924 for sType in asWords[1:]:
3925 if sType in g_kdOpTypes:
3926 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3927 else:
3928 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3929 return False;
3930 return True;
3931
3932 def doneInstructionOne(self, oInstr, iLine):
3933 """
3934 Complete the parsing by processing, validating and expanding raw inputs.
3935 """
3936 assert oInstr.iLineCompleted is None;
3937 oInstr.iLineCompleted = iLine;
3938
3939 #
3940 # Specified instructions.
3941 #
3942 if oInstr.cOpTags > 0:
3943 if oInstr.sStats is None:
3944 pass;
3945
3946 #
3947 # Unspecified legacy stuff. We generally only got a few things to go on here.
3948 # /** Opcode 0x0f 0x00 /0. */
3949 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3950 #
3951 else:
3952 #if oInstr.sRawOldOpcodes:
3953 #
3954 #if oInstr.sMnemonic:
3955 pass;
3956
3957 #
3958 # Common defaults.
3959 #
3960
3961 # Guess mnemonic and operands from stats if the former is missing.
3962 if oInstr.sMnemonic is None:
3963 if oInstr.sStats is not None:
3964 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3965 elif oInstr.sFunction is not None:
3966 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3967
3968 # Derive the disassembler op enum constant from the mnemonic.
3969 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3970 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3971
3972 # Derive the IEM statistics base name from mnemonic and operand types.
3973 if oInstr.sStats is None:
3974 if oInstr.sFunction is not None:
3975 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3976 elif oInstr.sMnemonic is not None:
3977 oInstr.sStats = oInstr.sMnemonic;
3978 for oOperand in oInstr.aoOperands:
3979 if oOperand.sType:
3980 oInstr.sStats += '_' + oOperand.sType;
3981
3982 # Derive the IEM function name from mnemonic and operand types.
3983 if oInstr.sFunction is None:
3984 if oInstr.sMnemonic is not None:
3985 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3986 for oOperand in oInstr.aoOperands:
3987 if oOperand.sType:
3988 oInstr.sFunction += '_' + oOperand.sType;
3989 elif oInstr.sStats:
3990 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3991
3992 #
3993 # Apply default map and then add the instruction to all it's groups.
3994 #
3995 if not oInstr.aoMaps:
3996 oInstr.aoMaps = [ self.oDefaultMap, ];
3997 for oMap in oInstr.aoMaps:
3998 oMap.aoInstructions.append(oInstr);
3999
4000 #
4001 # Derive encoding from operands and maps.
4002 #
4003 if oInstr.sEncoding is None:
4004 if not oInstr.aoOperands:
4005 if oInstr.fUnused and oInstr.sSubOpcode:
4006 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
4007 else:
4008 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
4009 elif oInstr.aoOperands[0].usesModRM():
4010 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
4011 or oInstr.onlyInVexMaps():
4012 oInstr.sEncoding = 'VEX.ModR/M';
4013 else:
4014 oInstr.sEncoding = 'ModR/M';
4015
4016 #
4017 # Check the opstat value and add it to the opstat indexed dictionary.
4018 #
4019 if oInstr.sStats:
4020 if oInstr.sStats not in g_dAllInstructionsByStat:
4021 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
4022 else:
4023 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
4024 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
4025
4026 #
4027 # Add to function indexed dictionary. We allow multiple instructions per function.
4028 #
4029 if oInstr.sFunction:
4030 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4031 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4032 else:
4033 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4034
4035 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4036 return True;
4037
4038 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4039 """
4040 Done with current instruction.
4041 """
4042 for oInstr in self.aoCurInstrs:
4043 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4044 if oInstr.fStub:
4045 self.cTotalStubs += 1;
4046
4047 self.cTotalInstr += len(self.aoCurInstrs);
4048
4049 self.sComment = '';
4050 self.aoCurInstrs = [];
4051 if fEndOfFunction:
4052 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4053 if self.oCurFunction:
4054 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4055 self.oCurFunction = None;
4056 self.iMcBlockInFunc = 0;
4057 return True;
4058
4059 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4060 """
4061 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4062 is False, only None values and empty strings are replaced.
4063 """
4064 for oInstr in self.aoCurInstrs:
4065 if fOverwrite is not True:
4066 oOldValue = getattr(oInstr, sAttrib);
4067 if oOldValue is not None:
4068 continue;
4069 setattr(oInstr, sAttrib, oValue);
4070
4071 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4072 """
4073 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4074 If fOverwrite is False, only None values and empty strings are replaced.
4075 """
4076 for oInstr in self.aoCurInstrs:
4077 aoArray = getattr(oInstr, sAttrib);
4078 while len(aoArray) <= iEntry:
4079 aoArray.append(None);
4080 if fOverwrite is True or aoArray[iEntry] is None:
4081 aoArray[iEntry] = oValue;
4082
4083 def parseCommentOldOpcode(self, asLines):
4084 """ Deals with 'Opcode 0xff /4' like comments """
4085 asWords = asLines[0].split();
4086 if len(asWords) >= 2 \
4087 and asWords[0] == 'Opcode' \
4088 and ( asWords[1].startswith('0x')
4089 or asWords[1].startswith('0X')):
4090 asWords = asWords[:1];
4091 for iWord, sWord in enumerate(asWords):
4092 if sWord.startswith('0X'):
4093 sWord = '0x' + sWord[:2];
4094 asWords[iWord] = asWords;
4095 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4096
4097 return False;
4098
4099 def ensureInstructionForOpTag(self, iTagLine):
4100 """ Ensure there is an instruction for the op-tag being parsed. """
4101 if not self.aoCurInstrs:
4102 self.addInstruction(self.iCommentLine + iTagLine);
4103 for oInstr in self.aoCurInstrs:
4104 oInstr.cOpTags += 1;
4105 if oInstr.cOpTags == 1:
4106 self.cTotalTagged += 1;
4107 return self.aoCurInstrs[-1];
4108
4109 @staticmethod
4110 def flattenSections(aasSections):
4111 """
4112 Flattens multiline sections into stripped single strings.
4113 Returns list of strings, on section per string.
4114 """
4115 asRet = [];
4116 for asLines in aasSections:
4117 if asLines:
4118 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4119 return asRet;
4120
4121 @staticmethod
4122 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4123 """
4124 Flattens sections into a simple stripped string with newlines as
4125 section breaks. The final section does not sport a trailing newline.
4126 """
4127 # Typical: One section with a single line.
4128 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4129 return aasSections[0][0].strip();
4130
4131 sRet = '';
4132 for iSection, asLines in enumerate(aasSections):
4133 if asLines:
4134 if iSection > 0:
4135 sRet += sSectionSep;
4136 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4137 return sRet;
4138
4139
4140
4141 ## @name Tag parsers
4142 ## @{
4143
4144 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4145 """
4146 Tag: @opbrief
4147 Value: Text description, multiple sections, appended.
4148
4149 Brief description. If not given, it's the first sentence from @opdesc.
4150 """
4151 oInstr = self.ensureInstructionForOpTag(iTagLine);
4152
4153 # Flatten and validate the value.
4154 sBrief = self.flattenAllSections(aasSections);
4155 if not sBrief:
4156 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4157 if sBrief[-1] != '.':
4158 sBrief = sBrief + '.';
4159 if len(sBrief) > 180:
4160 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4161 offDot = sBrief.find('.');
4162 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4163 offDot = sBrief.find('.', offDot + 1);
4164 if offDot >= 0 and offDot != len(sBrief) - 1:
4165 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4166
4167 # Update the instruction.
4168 if oInstr.sBrief is not None:
4169 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4170 % (sTag, oInstr.sBrief, sBrief,));
4171 _ = iEndLine;
4172 return True;
4173
4174 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4175 """
4176 Tag: @opdesc
4177 Value: Text description, multiple sections, appended.
4178
4179 It is used to describe instructions.
4180 """
4181 oInstr = self.ensureInstructionForOpTag(iTagLine);
4182 if aasSections:
4183 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4184 return True;
4185
4186 _ = sTag; _ = iEndLine;
4187 return True;
4188
4189 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4190 """
4191 Tag: @opmenmonic
4192 Value: mnemonic
4193
4194 The 'mnemonic' value must be a valid C identifier string. Because of
4195 prefixes, groups and whatnot, there times when the mnemonic isn't that
4196 of an actual assembler mnemonic.
4197 """
4198 oInstr = self.ensureInstructionForOpTag(iTagLine);
4199
4200 # Flatten and validate the value.
4201 sMnemonic = self.flattenAllSections(aasSections);
4202 if not self.oReMnemonic.match(sMnemonic):
4203 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4204 if oInstr.sMnemonic is not None:
4205 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4206 % (sTag, oInstr.sMnemonic, sMnemonic,));
4207 oInstr.sMnemonic = sMnemonic
4208
4209 _ = iEndLine;
4210 return True;
4211
4212 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4213 """
4214 Tags: @op1, @op2, @op3, @op4
4215 Value: [where:]type
4216
4217 The 'where' value indicates where the operand is found, like the 'reg'
4218 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4219 a list.
4220
4221 The 'type' value indicates the operand type. These follow the types
4222 given in the opcode tables in the CPU reference manuals.
4223 See Instruction.kdOperandTypes for a list.
4224
4225 """
4226 oInstr = self.ensureInstructionForOpTag(iTagLine);
4227 idxOp = int(sTag[-1]) - 1;
4228 assert 0 <= idxOp < 4;
4229
4230 # flatten, split up, and validate the "where:type" value.
4231 sFlattened = self.flattenAllSections(aasSections);
4232 asSplit = sFlattened.split(':');
4233 if len(asSplit) == 1:
4234 sType = asSplit[0];
4235 sWhere = None;
4236 elif len(asSplit) == 2:
4237 (sWhere, sType) = asSplit;
4238 else:
4239 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4240
4241 if sType not in g_kdOpTypes:
4242 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4243 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4244 if sWhere is None:
4245 sWhere = g_kdOpTypes[sType][1];
4246 elif sWhere not in g_kdOpLocations:
4247 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4248 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4249
4250 # Insert the operand, refusing to overwrite an existing one.
4251 while idxOp >= len(oInstr.aoOperands):
4252 oInstr.aoOperands.append(None);
4253 if oInstr.aoOperands[idxOp] is not None:
4254 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4255 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4256 sWhere, sType,));
4257 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4258
4259 _ = iEndLine;
4260 return True;
4261
4262 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4263 """
4264 Tag: @opmaps
4265 Value: map[,map2]
4266
4267 Indicates which maps the instruction is in. There is a default map
4268 associated with each input file.
4269 """
4270 oInstr = self.ensureInstructionForOpTag(iTagLine);
4271
4272 # Flatten, split up and validate the value.
4273 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4274 asMaps = sFlattened.split(',');
4275 if not asMaps:
4276 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4277 for sMap in asMaps:
4278 if sMap not in g_dInstructionMaps:
4279 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4280 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4281
4282 # Add the maps to the current list. Throw errors on duplicates.
4283 for oMap in oInstr.aoMaps:
4284 if oMap.sName in asMaps:
4285 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4286
4287 for sMap in asMaps:
4288 oMap = g_dInstructionMaps[sMap];
4289 if oMap not in oInstr.aoMaps:
4290 oInstr.aoMaps.append(oMap);
4291 else:
4292 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4293
4294 _ = iEndLine;
4295 return True;
4296
4297 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4298 """
4299 Tag: @oppfx
4300 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4301
4302 Required prefix for the instruction. (In a (E)VEX context this is the
4303 value of the 'pp' field rather than an actual prefix.)
4304 """
4305 oInstr = self.ensureInstructionForOpTag(iTagLine);
4306
4307 # Flatten and validate the value.
4308 sFlattened = self.flattenAllSections(aasSections);
4309 asPrefixes = sFlattened.split();
4310 if len(asPrefixes) > 1:
4311 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4312
4313 sPrefix = asPrefixes[0].lower();
4314 if sPrefix == 'none':
4315 sPrefix = 'none';
4316 elif sPrefix == 'n/a':
4317 sPrefix = None;
4318 else:
4319 if len(sPrefix) == 2:
4320 sPrefix = '0x' + sPrefix;
4321 if not _isValidOpcodeByte(sPrefix):
4322 if sPrefix != '!0xf3':
4323 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4324
4325 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4326 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4327
4328 # Set it.
4329 if oInstr.sPrefix is not None:
4330 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4331 oInstr.sPrefix = sPrefix;
4332
4333 _ = iEndLine;
4334 return True;
4335
4336 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4337 """
4338 Tag: @opcode
4339 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4340
4341 The opcode byte or sub-byte for the instruction in the context of a map.
4342 """
4343 oInstr = self.ensureInstructionForOpTag(iTagLine);
4344
4345 # Flatten and validate the value.
4346 sOpcode = self.flattenAllSections(aasSections);
4347 if _isValidOpcodeByte(sOpcode):
4348 pass;
4349 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4350 pass;
4351 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4352 pass;
4353 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4354 pass;
4355 else:
4356 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4357
4358 # Set it.
4359 if oInstr.sOpcode is not None:
4360 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4361 oInstr.sOpcode = sOpcode;
4362
4363 _ = iEndLine;
4364 return True;
4365
4366 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4367 """
4368 Tag: @opcodesub
4369 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4370 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4371 | !11 rex.w=0 | !11 mr/reg rex.w=0
4372 | !11 rex.w=1 | !11 mr/reg rex.w=1
4373
4374 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4375 represents exactly two different instructions. The more proper way would
4376 be to go via maps with two members, but this is faster.
4377 """
4378 oInstr = self.ensureInstructionForOpTag(iTagLine);
4379
4380 # Flatten and validate the value.
4381 sSubOpcode = self.flattenAllSections(aasSections);
4382 if sSubOpcode not in g_kdSubOpcodes:
4383 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4384 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4385 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4386
4387 # Set it.
4388 if oInstr.sSubOpcode is not None:
4389 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4390 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4391 oInstr.sSubOpcode = sSubOpcode;
4392
4393 _ = iEndLine;
4394 return True;
4395
4396 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4397 """
4398 Tag: @openc
4399 Value: ModR/M|fixed|prefix|<map name>
4400
4401 The instruction operand encoding style.
4402 """
4403 oInstr = self.ensureInstructionForOpTag(iTagLine);
4404
4405 # Flatten and validate the value.
4406 sEncoding = self.flattenAllSections(aasSections);
4407 if sEncoding in g_kdEncodings:
4408 pass;
4409 elif sEncoding in g_dInstructionMaps:
4410 pass;
4411 elif not _isValidOpcodeByte(sEncoding):
4412 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4413
4414 # Set it.
4415 if oInstr.sEncoding is not None:
4416 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4417 % ( sTag, oInstr.sEncoding, sEncoding,));
4418 oInstr.sEncoding = sEncoding;
4419
4420 _ = iEndLine;
4421 return True;
4422
4423 ## EFlags tag to Instruction attribute name.
4424 kdOpFlagToAttr = {
4425 '@opfltest': 'asFlTest',
4426 '@opflmodify': 'asFlModify',
4427 '@opflundef': 'asFlUndefined',
4428 '@opflset': 'asFlSet',
4429 '@opflclear': 'asFlClear',
4430 };
4431
4432 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4433 """
4434 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4435 Value: <eflags specifier>
4436
4437 """
4438 oInstr = self.ensureInstructionForOpTag(iTagLine);
4439
4440 # Flatten, split up and validate the values.
4441 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4442 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4443 asFlags = [];
4444 else:
4445 fRc = True;
4446 for iFlag, sFlag in enumerate(asFlags):
4447 if sFlag not in g_kdEFlagsMnemonics:
4448 if sFlag.strip() in g_kdEFlagsMnemonics:
4449 asFlags[iFlag] = sFlag.strip();
4450 else:
4451 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4452 if not fRc:
4453 return False;
4454
4455 # Set them.
4456 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4457 if asOld is not None and len(asOld) > 0:
4458 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4459 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4460
4461 _ = iEndLine;
4462 return True;
4463
4464 ## EFLAGS class definitions with their attribute lists.
4465 kdEFlagsClasses = {
4466 'arithmetic': { # add, sub, ...
4467 'asFlTest': [],
4468 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4469 'asFlClear': [],
4470 'asFlSet': [],
4471 'asFlUndefined': [],
4472 },
4473 'arithmetic_carry': { # adc, sbb, ...
4474 'asFlTest': [ 'cf', ],
4475 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4476 'asFlClear': [],
4477 'asFlSet': [],
4478 'asFlUndefined': [],
4479 },
4480 'incdec': {
4481 'asFlTest': [],
4482 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4483 'asFlClear': [],
4484 'asFlSet': [],
4485 'asFlUndefined': [],
4486 },
4487 'division': { ## @todo specify intel/amd differences...
4488 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4489 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4490 'asFlClear': [],
4491 'asFlSet': [],
4492 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4493 },
4494 'multiply': { ## @todo specify intel/amd differences...
4495 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4496 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4497 'asFlClear': [], # between IMUL and MUL.
4498 'asFlSet': [],
4499 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4500 },
4501 'logical': { # and, or, xor, ...
4502 'asFlTest': [],
4503 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4504 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4505 'asFlSet': [],
4506 'asFlUndefined': [ 'af', ],
4507 },
4508 'rotate_1': { # rol and ror with fixed 1 shift count
4509 'asFlTest': [],
4510 'asFlModify': [ 'cf', 'of', ],
4511 'asFlClear': [],
4512 'asFlSet': [],
4513 'asFlUndefined': [],
4514 },
4515 'rotate_count': { # rol and ror w/o fixed 1 shift count
4516 'asFlTest': [],
4517 'asFlModify': [ 'cf', 'of', ],
4518 'asFlClear': [],
4519 'asFlSet': [],
4520 'asFlUndefined': [ 'of', ],
4521 },
4522 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4523 'asFlTest': [ 'cf', ],
4524 'asFlModify': [ 'cf', 'of', ],
4525 'asFlClear': [],
4526 'asFlSet': [],
4527 'asFlUndefined': [],
4528 },
4529 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4530 'asFlTest': [ 'cf', ],
4531 'asFlModify': [ 'cf', 'of', ],
4532 'asFlClear': [],
4533 'asFlSet': [],
4534 'asFlUndefined': [ 'of', ],
4535 },
4536 'shift_1': { # shl, shr or sar with fixed 1 count.
4537 'asFlTest': [],
4538 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4539 'asFlClear': [],
4540 'asFlSet': [],
4541 'asFlUndefined': [ 'af', ],
4542 },
4543 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4544 'asFlTest': [],
4545 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4546 'asFlClear': [],
4547 'asFlSet': [],
4548 'asFlUndefined': [ 'af', 'of', ],
4549 },
4550 'bitmap': { # bt, btc, btr, btc
4551 'asFlTest': [],
4552 'asFlModify': [ 'cf', ],
4553 'asFlClear': [],
4554 'asFlSet': [],
4555 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4556 },
4557 'unchanged': {
4558 'asFlTest': [],
4559 'asFlModify': [],
4560 'asFlClear': [],
4561 'asFlSet': [],
4562 'asFlUndefined': [],
4563 },
4564 };
4565 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4566 """
4567 Tags: @opflclass
4568 Value: arithmetic, logical, ...
4569
4570 """
4571 oInstr = self.ensureInstructionForOpTag(iTagLine);
4572
4573 # Flatten and validate the value.
4574 sClass = self.flattenAllSections(aasSections);
4575 kdAttribs = self.kdEFlagsClasses.get(sClass);
4576 if not kdAttribs:
4577 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4578 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4579
4580 # Set the attributes.
4581 for sAttrib, asFlags in kdAttribs.items():
4582 asOld = getattr(oInstr, sAttrib);
4583 if asOld is not None:
4584 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4585 % (sTag, asOld, asFlags, sAttrib));
4586 setattr(oInstr, sAttrib, asFlags);
4587
4588 _ = iEndLine;
4589 return True;
4590
4591 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4592 """
4593 Tag: @ophints
4594 Value: Comma or space separated list of flags and hints.
4595
4596 This covers the disassembler flags table and more.
4597 """
4598 oInstr = self.ensureInstructionForOpTag(iTagLine);
4599
4600 # Flatten as a space separated list, split it up and validate the values.
4601 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4602 if len(asHints) == 1 and asHints[0].lower() == 'none':
4603 asHints = [];
4604 else:
4605 fRc = True;
4606 for iHint, sHint in enumerate(asHints):
4607 if sHint not in g_kdHints:
4608 if sHint.strip() in g_kdHints:
4609 sHint[iHint] = sHint.strip();
4610 else:
4611 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4612 if not fRc:
4613 return False;
4614
4615 # Append them.
4616 for sHint in asHints:
4617 if sHint not in oInstr.dHints:
4618 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4619 else:
4620 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4621
4622 _ = iEndLine;
4623 return True;
4624
4625 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4626 """
4627 Tag: @opdisenum
4628 Value: OP_XXXX
4629
4630 This is for select a specific (legacy) disassembler enum value for the
4631 instruction.
4632 """
4633 oInstr = self.ensureInstructionForOpTag(iTagLine);
4634
4635 # Flatten and split.
4636 asWords = self.flattenAllSections(aasSections).split();
4637 if len(asWords) != 1:
4638 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4639 if not asWords:
4640 return False;
4641 sDisEnum = asWords[0];
4642 if not self.oReDisEnum.match(sDisEnum):
4643 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4644 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4645
4646 # Set it.
4647 if oInstr.sDisEnum is not None:
4648 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4649 oInstr.sDisEnum = sDisEnum;
4650
4651 _ = iEndLine;
4652 return True;
4653
4654 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4655 """
4656 Tag: @opmincpu
4657 Value: <simple CPU name>
4658
4659 Indicates when this instruction was introduced.
4660 """
4661 oInstr = self.ensureInstructionForOpTag(iTagLine);
4662
4663 # Flatten the value, split into words, make sure there's just one, valid it.
4664 asCpus = self.flattenAllSections(aasSections).split();
4665 if len(asCpus) > 1:
4666 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4667
4668 sMinCpu = asCpus[0];
4669 if sMinCpu in g_kdCpuNames:
4670 oInstr.sMinCpu = sMinCpu;
4671 else:
4672 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4673 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4674
4675 # Set it.
4676 if oInstr.sMinCpu is None:
4677 oInstr.sMinCpu = sMinCpu;
4678 elif oInstr.sMinCpu != sMinCpu:
4679 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4680
4681 _ = iEndLine;
4682 return True;
4683
4684 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4685 """
4686 Tag: @opcpuid
4687 Value: none | <CPUID flag specifier>
4688
4689 CPUID feature bit which is required for the instruction to be present.
4690 """
4691 oInstr = self.ensureInstructionForOpTag(iTagLine);
4692
4693 # Flatten as a space separated list, split it up and validate the values.
4694 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4695 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4696 asCpuIds = [];
4697 else:
4698 fRc = True;
4699 for iCpuId, sCpuId in enumerate(asCpuIds):
4700 if sCpuId not in g_kdCpuIdFlags:
4701 if sCpuId.strip() in g_kdCpuIdFlags:
4702 sCpuId[iCpuId] = sCpuId.strip();
4703 else:
4704 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4705 if not fRc:
4706 return False;
4707
4708 # Append them.
4709 for sCpuId in asCpuIds:
4710 if sCpuId not in oInstr.asCpuIds:
4711 oInstr.asCpuIds.append(sCpuId);
4712 else:
4713 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4714
4715 _ = iEndLine;
4716 return True;
4717
4718 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4719 """
4720 Tag: @opgroup
4721 Value: op_grp1[_subgrp2[_subsubgrp3]]
4722
4723 Instruction grouping.
4724 """
4725 oInstr = self.ensureInstructionForOpTag(iTagLine);
4726
4727 # Flatten as a space separated list, split it up and validate the values.
4728 asGroups = self.flattenAllSections(aasSections).split();
4729 if len(asGroups) != 1:
4730 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4731 sGroup = asGroups[0];
4732 if not self.oReGroupName.match(sGroup):
4733 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4734 % (sTag, sGroup, self.oReGroupName.pattern));
4735
4736 # Set it.
4737 if oInstr.sGroup is not None:
4738 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4739 oInstr.sGroup = sGroup;
4740
4741 _ = iEndLine;
4742 return True;
4743
4744 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4745 """
4746 Tag: @opunused, @opinvalid, @opinvlstyle
4747 Value: <invalid opcode behaviour style>
4748
4749 The @opunused indicates the specification is for a currently unused
4750 instruction encoding.
4751
4752 The @opinvalid indicates the specification is for an invalid currently
4753 instruction encoding (like UD2).
4754
4755 The @opinvlstyle just indicates how CPUs decode the instruction when
4756 not supported (@opcpuid, @opmincpu) or disabled.
4757 """
4758 oInstr = self.ensureInstructionForOpTag(iTagLine);
4759
4760 # Flatten as a space separated list, split it up and validate the values.
4761 asStyles = self.flattenAllSections(aasSections).split();
4762 if len(asStyles) != 1:
4763 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4764 sStyle = asStyles[0];
4765 if sStyle not in g_kdInvalidStyles:
4766 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4767 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4768 # Set it.
4769 if oInstr.sInvalidStyle is not None:
4770 return self.errorComment(iTagLine,
4771 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4772 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4773 oInstr.sInvalidStyle = sStyle;
4774 if sTag == '@opunused':
4775 oInstr.fUnused = True;
4776 elif sTag == '@opinvalid':
4777 oInstr.fInvalid = True;
4778
4779 _ = iEndLine;
4780 return True;
4781
4782 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4783 """
4784 Tag: @optest
4785 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4786 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4787
4788 The main idea here is to generate basic instruction tests.
4789
4790 The probably simplest way of handling the diverse input, would be to use
4791 it to produce size optimized byte code for a simple interpreter that
4792 modifies the register input and output states.
4793
4794 An alternative to the interpreter would be creating multiple tables,
4795 but that becomes rather complicated wrt what goes where and then to use
4796 them in an efficient manner.
4797 """
4798 oInstr = self.ensureInstructionForOpTag(iTagLine);
4799
4800 #
4801 # Do it section by section.
4802 #
4803 for asSectionLines in aasSections:
4804 #
4805 # Sort the input into outputs, inputs and selector conditions.
4806 #
4807 sFlatSection = self.flattenAllSections([asSectionLines,]);
4808 if not sFlatSection:
4809 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4810 continue;
4811 oTest = InstructionTest(oInstr);
4812
4813 asSelectors = [];
4814 asInputs = [];
4815 asOutputs = [];
4816 asCur = asOutputs;
4817 fRc = True;
4818 asWords = sFlatSection.split();
4819 for iWord in range(len(asWords) - 1, -1, -1):
4820 sWord = asWords[iWord];
4821 # Check for array switchers.
4822 if sWord == '->':
4823 if asCur != asOutputs:
4824 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4825 break;
4826 asCur = asInputs;
4827 elif sWord == '/':
4828 if asCur != asInputs:
4829 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4830 break;
4831 asCur = asSelectors;
4832 else:
4833 asCur.insert(0, sWord);
4834
4835 #
4836 # Validate and add selectors.
4837 #
4838 for sCond in asSelectors:
4839 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4840 oSelector = None;
4841 for sOp in TestSelector.kasCompareOps:
4842 off = sCondExp.find(sOp);
4843 if off >= 0:
4844 sVariable = sCondExp[:off];
4845 sValue = sCondExp[off + len(sOp):];
4846 if sVariable in TestSelector.kdVariables:
4847 if sValue in TestSelector.kdVariables[sVariable]:
4848 oSelector = TestSelector(sVariable, sOp, sValue);
4849 else:
4850 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4851 % ( sTag, sValue, sCond,
4852 TestSelector.kdVariables[sVariable].keys(),));
4853 else:
4854 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4855 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4856 break;
4857 if oSelector is not None:
4858 for oExisting in oTest.aoSelectors:
4859 if oExisting.sVariable == oSelector.sVariable:
4860 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4861 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4862 oTest.aoSelectors.append(oSelector);
4863 else:
4864 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4865
4866 #
4867 # Validate outputs and inputs, adding them to the test as we go along.
4868 #
4869 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4870 asValidFieldKinds = [ 'both', sDesc, ];
4871 for sItem in asItems:
4872 oItem = None;
4873 for sOp in TestInOut.kasOperators:
4874 off = sItem.find(sOp);
4875 if off < 0:
4876 continue;
4877 sField = sItem[:off];
4878 sValueType = sItem[off + len(sOp):];
4879 if sField in TestInOut.kdFields \
4880 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4881 asSplit = sValueType.split(':', 1);
4882 sValue = asSplit[0];
4883 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4884 if sType in TestInOut.kdTypes:
4885 oValid = TestInOut.kdTypes[sType].validate(sValue);
4886 if oValid is True:
4887 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4888 oItem = TestInOut(sField, sOp, sValue, sType);
4889 else:
4890 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4891 % ( sTag, sDesc, sItem, ));
4892 else:
4893 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4894 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4895 else:
4896 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4897 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4898 else:
4899 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4900 % ( sTag, sDesc, sField, sItem,
4901 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4902 if asVal[1] in asValidFieldKinds]),));
4903 break;
4904 if oItem is not None:
4905 for oExisting in aoDst:
4906 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4907 self.errorComment(iTagLine,
4908 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4909 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4910 aoDst.append(oItem);
4911 else:
4912 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4913
4914 #
4915 # .
4916 #
4917 if fRc:
4918 oInstr.aoTests.append(oTest);
4919 else:
4920 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4921 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4922 % (sTag, asSelectors, asInputs, asOutputs,));
4923
4924 _ = iEndLine;
4925 return True;
4926
4927 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4928 """
4929 Numbered @optest tag. Either @optest42 or @optest[42].
4930 """
4931 oInstr = self.ensureInstructionForOpTag(iTagLine);
4932
4933 iTest = 0;
4934 if sTag[-1] == ']':
4935 iTest = int(sTag[8:-1]);
4936 else:
4937 iTest = int(sTag[7:]);
4938
4939 if iTest != len(oInstr.aoTests):
4940 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4941 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4942
4943 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4944 """
4945 Tag: @optestign | @optestignore
4946 Value: <value is ignored>
4947
4948 This is a simple trick to ignore a test while debugging another.
4949
4950 See also @oponlytest.
4951 """
4952 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4953 return True;
4954
4955 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4956 """
4957 Tag: @opcopytests
4958 Value: <opstat | function> [..]
4959 Example: @opcopytests add_Eb_Gb
4960
4961 Trick to avoid duplicating tests for different encodings of the same
4962 operation.
4963 """
4964 oInstr = self.ensureInstructionForOpTag(iTagLine);
4965
4966 # Flatten, validate and append the copy job to the instruction. We execute
4967 # them after parsing all the input so we can handle forward references.
4968 asToCopy = self.flattenAllSections(aasSections).split();
4969 if not asToCopy:
4970 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4971 for sToCopy in asToCopy:
4972 if sToCopy not in oInstr.asCopyTests:
4973 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4974 oInstr.asCopyTests.append(sToCopy);
4975 else:
4976 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4977 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4978 else:
4979 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4980
4981 _ = iEndLine;
4982 return True;
4983
4984 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4985 """
4986 Tag: @oponlytest | @oponly
4987 Value: none
4988
4989 Only test instructions with this tag. This is a trick that is handy
4990 for singling out one or two new instructions or tests.
4991
4992 See also @optestignore.
4993 """
4994 oInstr = self.ensureInstructionForOpTag(iTagLine);
4995
4996 # Validate and add instruction to only test dictionary.
4997 sValue = self.flattenAllSections(aasSections).strip();
4998 if sValue:
4999 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
5000
5001 if oInstr not in g_aoOnlyTestInstructions:
5002 g_aoOnlyTestInstructions.append(oInstr);
5003
5004 _ = iEndLine;
5005 return True;
5006
5007 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
5008 """
5009 Tag: @opxcpttype
5010 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
5011
5012 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
5013 """
5014 oInstr = self.ensureInstructionForOpTag(iTagLine);
5015
5016 # Flatten as a space separated list, split it up and validate the values.
5017 asTypes = self.flattenAllSections(aasSections).split();
5018 if len(asTypes) != 1:
5019 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
5020 sType = asTypes[0];
5021 if sType not in g_kdXcptTypes:
5022 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
5023 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
5024 # Set it.
5025 if oInstr.sXcptType is not None:
5026 return self.errorComment(iTagLine,
5027 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
5028 % ( sTag, oInstr.sXcptType, sType,));
5029 oInstr.sXcptType = sType;
5030
5031 _ = iEndLine;
5032 return True;
5033
5034 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5035 """
5036 Tag: @opfunction
5037 Value: <VMM function name>
5038
5039 This is for explicitly setting the IEM function name. Normally we pick
5040 this up from the FNIEMOP_XXX macro invocation after the description, or
5041 generate it from the mnemonic and operands.
5042
5043 It it thought it maybe necessary to set it when specifying instructions
5044 which implementation isn't following immediately or aren't implemented yet.
5045 """
5046 oInstr = self.ensureInstructionForOpTag(iTagLine);
5047
5048 # Flatten and validate the value.
5049 sFunction = self.flattenAllSections(aasSections);
5050 if not self.oReFunctionName.match(sFunction):
5051 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5052 % (sTag, sFunction, self.oReFunctionName.pattern));
5053
5054 if oInstr.sFunction is not None:
5055 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5056 % (sTag, oInstr.sFunction, sFunction,));
5057 oInstr.sFunction = sFunction;
5058
5059 _ = iEndLine;
5060 return True;
5061
5062 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5063 """
5064 Tag: @opstats
5065 Value: <VMM statistics base name>
5066
5067 This is for explicitly setting the statistics name. Normally we pick
5068 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5069 the mnemonic and operands.
5070
5071 It it thought it maybe necessary to set it when specifying instructions
5072 which implementation isn't following immediately or aren't implemented yet.
5073 """
5074 oInstr = self.ensureInstructionForOpTag(iTagLine);
5075
5076 # Flatten and validate the value.
5077 sStats = self.flattenAllSections(aasSections);
5078 if not self.oReStatsName.match(sStats):
5079 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5080 % (sTag, sStats, self.oReStatsName.pattern));
5081
5082 if oInstr.sStats is not None:
5083 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5084 % (sTag, oInstr.sStats, sStats,));
5085 oInstr.sStats = sStats;
5086
5087 _ = iEndLine;
5088 return True;
5089
5090 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5091 """
5092 Tag: @opdone
5093 Value: none
5094
5095 Used to explictily flush the instructions that have been specified.
5096 """
5097 sFlattened = self.flattenAllSections(aasSections);
5098 if sFlattened != '':
5099 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5100 _ = sTag; _ = iEndLine;
5101 return self.doneInstructions();
5102
5103 ## @}
5104
5105
5106 def parseComment(self):
5107 """
5108 Parse the current comment (self.sComment).
5109
5110 If it's a opcode specifiying comment, we reset the macro stuff.
5111 """
5112 #
5113 # Reject if comment doesn't seem to contain anything interesting.
5114 #
5115 if self.sComment.find('Opcode') < 0 \
5116 and self.sComment.find('@') < 0:
5117 return False;
5118
5119 #
5120 # Split the comment into lines, removing leading asterisks and spaces.
5121 # Also remove leading and trailing empty lines.
5122 #
5123 asLines = self.sComment.split('\n');
5124 for iLine, sLine in enumerate(asLines):
5125 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5126
5127 while asLines and not asLines[0]:
5128 self.iCommentLine += 1;
5129 asLines.pop(0);
5130
5131 while asLines and not asLines[-1]:
5132 asLines.pop(len(asLines) - 1);
5133
5134 #
5135 # Check for old style: Opcode 0x0f 0x12
5136 #
5137 if asLines[0].startswith('Opcode '):
5138 self.parseCommentOldOpcode(asLines);
5139
5140 #
5141 # Look for @op* tagged data.
5142 #
5143 cOpTags = 0;
5144 sFlatDefault = None;
5145 sCurTag = '@default';
5146 iCurTagLine = 0;
5147 asCurSection = [];
5148 aasSections = [ asCurSection, ];
5149 for iLine, sLine in enumerate(asLines):
5150 if not sLine.startswith('@'):
5151 if sLine:
5152 asCurSection.append(sLine);
5153 elif asCurSection:
5154 asCurSection = [];
5155 aasSections.append(asCurSection);
5156 else:
5157 #
5158 # Process the previous tag.
5159 #
5160 if not asCurSection and len(aasSections) > 1:
5161 aasSections.pop(-1);
5162 if sCurTag in self.dTagHandlers:
5163 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5164 cOpTags += 1;
5165 elif sCurTag.startswith('@op'):
5166 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5167 elif sCurTag == '@default':
5168 sFlatDefault = self.flattenAllSections(aasSections);
5169 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5170 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5171 elif sCurTag in ['@encoding', '@opencoding']:
5172 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5173
5174 #
5175 # New tag.
5176 #
5177 asSplit = sLine.split(None, 1);
5178 sCurTag = asSplit[0].lower();
5179 if len(asSplit) > 1:
5180 asCurSection = [asSplit[1],];
5181 else:
5182 asCurSection = [];
5183 aasSections = [asCurSection, ];
5184 iCurTagLine = iLine;
5185
5186 #
5187 # Process the final tag.
5188 #
5189 if not asCurSection and len(aasSections) > 1:
5190 aasSections.pop(-1);
5191 if sCurTag in self.dTagHandlers:
5192 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5193 cOpTags += 1;
5194 elif sCurTag.startswith('@op'):
5195 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5196 elif sCurTag == '@default':
5197 sFlatDefault = self.flattenAllSections(aasSections);
5198
5199 #
5200 # Don't allow default text in blocks containing @op*.
5201 #
5202 if cOpTags > 0 and sFlatDefault:
5203 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5204
5205 return True;
5206
5207 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5208 """
5209 Parses a macro invocation.
5210
5211 Returns three values:
5212 1. A list of macro arguments, where the zero'th is the macro name.
5213 2. The offset following the macro invocation, into sInvocation of
5214 this is on the same line or into the last line if it is on a
5215 different line.
5216 3. Number of additional lines the invocation spans (i.e. zero if
5217 it is all contained within sInvocation).
5218 """
5219 # First the name.
5220 offOpen = sInvocation.find('(', offStartInvocation);
5221 if offOpen <= offStartInvocation:
5222 self.raiseError("macro invocation open parenthesis not found");
5223 sName = sInvocation[offStartInvocation:offOpen].strip();
5224 if not self.oReMacroName.match(sName):
5225 self.raiseError("invalid macro name '%s'" % (sName,));
5226 asRet = [sName, ];
5227
5228 # Arguments.
5229 iLine = self.iLine;
5230 cDepth = 1;
5231 off = offOpen + 1;
5232 offStart = off;
5233 offCurLn = 0;
5234 chQuote = None;
5235 while cDepth > 0:
5236 if off >= len(sInvocation):
5237 if iLine >= len(self.asLines):
5238 self.error('macro invocation beyond end of file');
5239 return (asRet, off - offCurLn, iLine - self.iLine);
5240 offCurLn = off;
5241 sInvocation += self.asLines[iLine];
5242 iLine += 1;
5243 ch = sInvocation[off];
5244
5245 if chQuote:
5246 if ch == '\\' and off + 1 < len(sInvocation):
5247 off += 1;
5248 elif ch == chQuote:
5249 chQuote = None;
5250 elif ch in ('"', '\'',):
5251 chQuote = ch;
5252 elif ch in (',', ')',):
5253 if cDepth == 1:
5254 asRet.append(sInvocation[offStart:off].strip());
5255 offStart = off + 1;
5256 if ch == ')':
5257 cDepth -= 1;
5258 elif ch == '(':
5259 cDepth += 1;
5260 off += 1;
5261
5262 return (asRet, off - offCurLn, iLine - self.iLine);
5263
5264 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5265 """
5266 Returns (None, len(sCode), 0) if not found, otherwise the
5267 parseMacroInvocation() return value.
5268 """
5269 offHit = sCode.find(sMacro, offStart);
5270 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5271 return self.parseMacroInvocation(sCode, offHit);
5272 return (None, len(sCode), 0);
5273
5274 def findAndParseMacroInvocation(self, sCode, sMacro):
5275 """
5276 Returns None if not found, arguments as per parseMacroInvocation if found.
5277 """
5278 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5279
5280 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5281 """
5282 Returns same as findAndParseMacroInvocation.
5283 """
5284 for sMacro in asMacro:
5285 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5286 if asRet is not None:
5287 return asRet;
5288 return None;
5289
5290 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5291 sDisHints, sIemHints, asOperands):
5292 """
5293 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5294 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5295 """
5296 #
5297 # Some invocation checks.
5298 #
5299 if sUpper != sUpper.upper():
5300 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5301 if sLower != sLower.lower():
5302 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5303 if sUpper.lower() != sLower:
5304 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5305 if not self.oReMnemonic.match(sLower):
5306 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5307
5308 #
5309 # Check if sIemHints tells us to not consider this macro invocation.
5310 #
5311 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5312 return True;
5313
5314 # Apply to the last instruction only for now.
5315 if not self.aoCurInstrs:
5316 self.addInstruction();
5317 oInstr = self.aoCurInstrs[-1];
5318 if oInstr.iLineMnemonicMacro == -1:
5319 oInstr.iLineMnemonicMacro = self.iLine;
5320 else:
5321 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5322 % (sMacro, oInstr.iLineMnemonicMacro,));
5323
5324 # Mnemonic
5325 if oInstr.sMnemonic is None:
5326 oInstr.sMnemonic = sLower;
5327 elif oInstr.sMnemonic != sLower:
5328 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5329
5330 # Process operands.
5331 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5332 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5333 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5334 for iOperand, sType in enumerate(asOperands):
5335 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5336 if sWhere is None:
5337 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5338 if iOperand < len(oInstr.aoOperands): # error recovery.
5339 sWhere = oInstr.aoOperands[iOperand].sWhere;
5340 sType = oInstr.aoOperands[iOperand].sType;
5341 else:
5342 sWhere = 'reg';
5343 sType = 'Gb';
5344 if iOperand == len(oInstr.aoOperands):
5345 oInstr.aoOperands.append(Operand(sWhere, sType))
5346 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5347 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5348 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5349 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5350
5351 # Encoding.
5352 if sForm not in g_kdIemForms:
5353 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5354 else:
5355 if oInstr.sEncoding is None:
5356 oInstr.sEncoding = g_kdIemForms[sForm][0];
5357 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5358 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5359 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5360
5361 # Check the parameter locations for the encoding.
5362 if g_kdIemForms[sForm][1] is not None:
5363 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5364 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5365 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5366 else:
5367 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5368 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5369 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5370 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5371 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5372 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5373 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5374 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5375 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5376 or sForm.replace('VEX','').find('V') < 0) ):
5377 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5378 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5379 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5380 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5381 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5382 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5383 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5384 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5385 oInstr.aoOperands[iOperand].sWhere));
5386
5387
5388 # Check @opcodesub
5389 if oInstr.sSubOpcode \
5390 and g_kdIemForms[sForm][2] \
5391 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5392 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5393 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5394
5395 # Stats.
5396 if not self.oReStatsName.match(sStats):
5397 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5398 elif oInstr.sStats is None:
5399 oInstr.sStats = sStats;
5400 elif oInstr.sStats != sStats:
5401 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5402 % (sMacro, oInstr.sStats, sStats,));
5403
5404 # Process the hints (simply merge with @ophints w/o checking anything).
5405 for sHint in sDisHints.split('|'):
5406 sHint = sHint.strip();
5407 if sHint.startswith('DISOPTYPE_'):
5408 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5409 if sShortHint in g_kdHints:
5410 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5411 else:
5412 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5413 elif sHint != '0':
5414 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5415
5416 for sHint in sIemHints.split('|'):
5417 sHint = sHint.strip();
5418 if sHint.startswith('IEMOPHINT_'):
5419 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5420 if sShortHint in g_kdHints:
5421 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5422 else:
5423 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5424 elif sHint != '0':
5425 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5426
5427 _ = sAsm;
5428 return True;
5429
5430 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5431 """
5432 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5433 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5434 """
5435 if not asOperands:
5436 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5437 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5438 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5439
5440 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5441 """
5442 Process a IEM_MC_BEGIN macro invocation.
5443 """
5444 if self.fDebugMc:
5445 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5446 #self.debug('%s<eos>' % (sCode,));
5447
5448 # Check preconditions.
5449 if not self.oCurFunction:
5450 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5451 if self.oCurMcBlock:
5452 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5453
5454 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5455 cchIndent = offBeginStatementInCodeStr;
5456 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5457 if offPrevNewline >= 0:
5458 cchIndent -= offPrevNewline + 1;
5459 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5460
5461 # Start a new block.
5462 # But don't add it to the list unless the context matches the host architecture.
5463 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5464 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5465 cchIndent = cchIndent);
5466 try:
5467 if ( not self.aoCppCondStack
5468 or not self.sHostArch
5469 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5470 g_aoMcBlocks.append(self.oCurMcBlock);
5471 self.cTotalMcBlocks += 1;
5472 except Exception as oXcpt:
5473 self.raiseError(oXcpt.args[0]);
5474
5475 if self.oCurMcBlock.oInstruction:
5476 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5477 self.iMcBlockInFunc += 1;
5478 return True;
5479
5480 @staticmethod
5481 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5482 """
5483 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5484 extracting a statement block from a string that's the result of macro
5485 expansion and therefore contains multiple "sub-lines" as it were.
5486
5487 Returns list of lines covering offBegin thru offEnd in sRawLine.
5488 """
5489
5490 off = sRawLine.find('\n', offEnd);
5491 if off > 0:
5492 sRawLine = sRawLine[:off + 1];
5493
5494 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5495 sRawLine = sRawLine[off:];
5496 if not sRawLine.strip().startswith(sBeginStmt):
5497 sRawLine = sRawLine[offBegin - off:]
5498
5499 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5500
5501 def workerIemMcEnd(self, offEndStatementInLine):
5502 """
5503 Process a IEM_MC_END macro invocation.
5504 """
5505 if self.fDebugMc:
5506 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5507
5508 # Check preconditions.
5509 if not self.oCurMcBlock:
5510 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5511
5512 #
5513 # HACK ALERT! For blocks originating from macro expansion the start and
5514 # end line will be the same, but the line has multiple
5515 # newlines inside it. So, we have to do some extra tricks
5516 # to get the lines out of there. We ASSUME macros aren't
5517 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5518 #
5519 if self.iLine > self.oCurMcBlock.iBeginLine:
5520 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5521 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5522 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5523
5524 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5525 # so we can deal correctly with IEM_MC_END below and everything else.
5526 for sLine in asLines:
5527 cNewLines = sLine.count('\n');
5528 assert cNewLines > 0;
5529 if cNewLines > 1:
5530 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5531 self.oCurMcBlock.offBeginLine,
5532 offEndStatementInLine
5533 + sum(len(s) for s in asLines)
5534 - len(asLines[-1]));
5535 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5536 break;
5537 else:
5538 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5539 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5540 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5541
5542 #
5543 # Strip anything following the IEM_MC_END(); statement in the final line,
5544 # so that we don't carry on any trailing 'break' after macro expansions
5545 # like for iemOp_movsb_Xb_Yb.
5546 #
5547 while asLines[-1].strip() == '':
5548 asLines.pop();
5549 sFinal = asLines[-1];
5550 offFinalEnd = sFinal.find('IEM_MC_END');
5551 offEndInFinal = offFinalEnd;
5552 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5553 offFinalEnd += len('IEM_MC_END');
5554
5555 while sFinal[offFinalEnd].isspace():
5556 offFinalEnd += 1;
5557 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5558 offFinalEnd += 1;
5559
5560 while sFinal[offFinalEnd].isspace():
5561 offFinalEnd += 1;
5562 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5563 offFinalEnd += 1;
5564
5565 while sFinal[offFinalEnd].isspace():
5566 offFinalEnd += 1;
5567 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5568 offFinalEnd += 1;
5569
5570 asLines[-1] = sFinal[: offFinalEnd];
5571
5572 #
5573 # Complete and discard the current block.
5574 #
5575 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5576 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5577 self.oCurMcBlock = None;
5578 return True;
5579
5580 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5581 """
5582 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5583 """
5584 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5585 if self.fDebugMc:
5586 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5587 #self.debug('%s<eos>' % (sCode,));
5588
5589 # Check preconditions.
5590 if not self.oCurFunction:
5591 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5592 if self.oCurMcBlock:
5593 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5594
5595 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5596 cchIndent = offBeginStatementInCodeStr;
5597 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5598 if offPrevNewline >= 0:
5599 cchIndent -= offPrevNewline + 1;
5600 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5601
5602 # Start a new block.
5603 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5604 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5605 cchIndent = cchIndent, fDeferToCImpl = True);
5606
5607 # Parse the statment.
5608 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5609 if asArgs is None:
5610 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5611 if len(asArgs) != cParams + 4:
5612 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5613 % (sStmt, len(asArgs), cParams + 4, asArgs));
5614
5615 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5616
5617 # These MCs are not typically part of macro expansions, but let's get
5618 # it out of the way immediately if it's the case.
5619 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5620 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5621 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5622 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5623 asLines[-1] = asLines[-1][:offAfter + 1];
5624 else:
5625 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5626 offAfter, sStmt);
5627 assert asLines[-1].find(';') >= 0;
5628 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5629
5630 assert asLines[0].find(sStmt) >= 0;
5631 #if not asLines[0].strip().startswith(sStmt):
5632 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5633
5634 # Advance to the line with the closing ')'.
5635 self.iLine += cLines;
5636
5637 # Complete the block.
5638 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5639
5640 g_aoMcBlocks.append(oMcBlock);
5641 if oMcBlock.oInstruction:
5642 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5643 self.cTotalMcBlocks += 1;
5644 self.iMcBlockInFunc += 1;
5645
5646 return True;
5647
5648 def workerStartFunction(self, asArgs):
5649 """
5650 Deals with the start of a decoder function.
5651
5652 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5653 macros, so we get a argument list for these where the 0th argument is the
5654 macro name.
5655 """
5656 # Complete any existing function.
5657 if self.oCurFunction:
5658 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5659
5660 # Create the new function.
5661 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5662 return True;
5663
5664 def checkCodeForMacro(self, sCode, offLine):
5665 """
5666 Checks code for relevant macro invocation.
5667 """
5668
5669 #
5670 # Scan macro invocations.
5671 #
5672 if sCode.find('(') > 0:
5673 # Look for instruction decoder function definitions. ASSUME single line.
5674 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5675 [ 'FNIEMOP_DEF',
5676 'FNIEMOPRM_DEF',
5677 'FNIEMOP_STUB',
5678 'FNIEMOP_STUB_1',
5679 'FNIEMOP_UD_STUB',
5680 'FNIEMOP_UD_STUB_1' ]);
5681 if asArgs is not None:
5682 self.workerStartFunction(asArgs);
5683 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5684
5685 if not self.aoCurInstrs:
5686 self.addInstruction();
5687 for oInstr in self.aoCurInstrs:
5688 if oInstr.iLineFnIemOpMacro == -1:
5689 oInstr.iLineFnIemOpMacro = self.iLine;
5690 else:
5691 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5692 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5693 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5694 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5695 if asArgs[0].find('STUB') > 0:
5696 self.doneInstructions(fEndOfFunction = True);
5697 return True;
5698
5699 # Check for worker function definitions, so we can get a context for MC blocks.
5700 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5701 [ 'FNIEMOP_DEF_1',
5702 'FNIEMOP_DEF_2', ]);
5703 if asArgs is not None:
5704 self.workerStartFunction(asArgs);
5705 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5706 return True;
5707
5708 # IEMOP_HLP_DONE_VEX_DECODING_*
5709 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5710 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5711 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5712 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5713 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5714 ]);
5715 if asArgs is not None:
5716 sMacro = asArgs[0];
5717 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5718 for oInstr in self.aoCurInstrs:
5719 if 'vex_l_zero' not in oInstr.dHints:
5720 if oInstr.iLineMnemonicMacro >= 0:
5721 self.errorOnLine(oInstr.iLineMnemonicMacro,
5722 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5723 oInstr.dHints['vex_l_zero'] = True;
5724
5725 #
5726 # IEMOP_MNEMONIC*
5727 #
5728 if sCode.find('IEMOP_MNEMONIC') >= 0:
5729 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5730 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5731 if asArgs is not None:
5732 if len(self.aoCurInstrs) == 1:
5733 oInstr = self.aoCurInstrs[0];
5734 if oInstr.sStats is None:
5735 oInstr.sStats = asArgs[1];
5736 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5737
5738 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5739 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5740 if asArgs is not None:
5741 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5742 asArgs[7], []);
5743 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5744 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5745 if asArgs is not None:
5746 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5747 asArgs[8], [asArgs[6],]);
5748 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5749 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5750 if asArgs is not None:
5751 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5752 asArgs[9], [asArgs[6], asArgs[7]]);
5753 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5754 # a_fIemHints)
5755 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5756 if asArgs is not None:
5757 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5758 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5759 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5760 # a_fIemHints)
5761 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5762 if asArgs is not None:
5763 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5764 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5765
5766 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5767 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5768 if asArgs is not None:
5769 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5770 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5771 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5772 if asArgs is not None:
5773 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5774 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5775 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5776 if asArgs is not None:
5777 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5778 [asArgs[4], asArgs[5],]);
5779 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5780 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5781 if asArgs is not None:
5782 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5783 [asArgs[4], asArgs[5], asArgs[6],]);
5784 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5785 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5786 if asArgs is not None:
5787 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5788 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5789
5790 #
5791 # IEM_MC_BEGIN + IEM_MC_END.
5792 # We must support multiple instances per code snippet.
5793 #
5794 offCode = sCode.find('IEM_MC_');
5795 if offCode >= 0:
5796 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5797 if oMatch.group(1) == 'END':
5798 self.workerIemMcEnd(offLine + oMatch.start());
5799 elif oMatch.group(1) == 'BEGIN':
5800 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5801 else:
5802 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5803 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5804 return True;
5805
5806 return False;
5807
5808 def workerPreprocessorRecreateMacroRegex(self):
5809 """
5810 Recreates self.oReMacros when self.dMacros changes.
5811 """
5812 if self.dMacros:
5813 sRegex = '';
5814 for sName, oMacro in self.dMacros.items():
5815 if sRegex:
5816 sRegex += r'|' + sName;
5817 else:
5818 sRegex = r'\b(' + sName;
5819 if oMacro.asArgs is not None:
5820 sRegex += r'\s*\(';
5821 else:
5822 sRegex += r'\b';
5823 sRegex += ')';
5824 self.oReMacros = re.compile(sRegex);
5825 else:
5826 self.oReMacros = None;
5827 return True;
5828
5829 def workerPreprocessorDefine(self, sRest):
5830 """
5831 Handles a macro #define, the sRest is what follows after the directive word.
5832 """
5833 assert sRest[-1] == '\n';
5834
5835 #
5836 # If using line continutation, just concat all the lines together,
5837 # preserving the newline character but not the escaping.
5838 #
5839 iLineStart = self.iLine;
5840 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5841 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5842 self.iLine += 1;
5843 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5844
5845 #
5846 # Use regex to split out the name, argument list and body.
5847 # If this fails, we assume it's a simple macro.
5848 #
5849 oMatch = self.oReHashDefine2.match(sRest);
5850 if oMatch:
5851 sAllArgs = oMatch.group(2).strip();
5852 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5853 sBody = oMatch.group(3);
5854 else:
5855 oMatch = self.oReHashDefine3.match(sRest);
5856 if not oMatch:
5857 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5858 return self.error('bogus macro definition: %s' % (sRest,));
5859 asArgs = None;
5860 sBody = oMatch.group(2);
5861 sName = oMatch.group(1);
5862 assert sName == sName.strip();
5863 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5864
5865 #
5866 # Is this of any interest to us? We do NOT support MC blocks wihtin
5867 # nested macro expansion, just to avoid lots of extra work.
5868 #
5869 # There is only limited support for macros expanding to partial MC blocks.
5870 #
5871 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5872 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5873 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5874 # siblings in the recompiler. This is a lot simpler than nested macro
5875 # expansion and lots of heuristics for locating all the relevant macros.
5876 # Also, this way we don't produce lots of unnecessary threaded functions.
5877 #
5878 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5879 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5880 return True;
5881
5882 #
5883 # Add the macro.
5884 #
5885 if self.fDebugPreproc:
5886 self.debug('#define %s on line %u' % (sName, self.iLine,));
5887 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5888 return self.workerPreprocessorRecreateMacroRegex();
5889
5890 def workerPreprocessorUndef(self, sRest):
5891 """
5892 Handles a macro #undef, the sRest is what follows after the directive word.
5893 """
5894 # Quick comment strip and isolate the name.
5895 offSlash = sRest.find('/');
5896 if offSlash > 0:
5897 sRest = sRest[:offSlash];
5898 sName = sRest.strip();
5899
5900 # Remove the macro if we're clocking it.
5901 if sName in self.dMacros:
5902 if self.fDebugPreproc:
5903 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5904 del self.dMacros[sName];
5905 return self.workerPreprocessorRecreateMacroRegex();
5906
5907 return True;
5908
5909 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5910 """
5911 Handles an #if, #ifdef, #ifndef or #elif directive.
5912 """
5913 #
5914 # Sanity check #elif.
5915 #
5916 if sDirective == 'elif':
5917 if len(self.aoCppCondStack) == 0:
5918 self.raiseError('#elif without #if');
5919 if self.aoCppCondStack[-1].fInElse:
5920 self.raiseError('#elif after #else');
5921
5922 #
5923 # If using line continutation, just concat all the lines together,
5924 # stripping both the newline and escape characters.
5925 #
5926 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5927 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5928 self.iLine += 1;
5929
5930 # Strip it of all comments and leading and trailing blanks.
5931 sRest = self.stripComments(sRest).strip();
5932
5933 #
5934 # Stash it.
5935 #
5936 try:
5937 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5938 except Exception as oXcpt:
5939 self.raiseError(oXcpt.args[0]);
5940
5941 if sDirective == 'elif':
5942 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5943 else:
5944 self.aoCppCondStack.append(oPreprocCond);
5945
5946 return True;
5947
5948 def workerPreprocessorElse(self):
5949 """
5950 Handles an #else directive.
5951 """
5952 if len(self.aoCppCondStack) == 0:
5953 self.raiseError('#else without #if');
5954 if self.aoCppCondStack[-1].fInElse:
5955 self.raiseError('Another #else after #else');
5956
5957 self.aoCppCondStack[-1].fInElse = True;
5958 return True;
5959
5960 def workerPreprocessorEndif(self):
5961 """
5962 Handles an #endif directive.
5963 """
5964 if len(self.aoCppCondStack) == 0:
5965 self.raiseError('#endif without #if');
5966
5967 self.aoCppCondStack.pop();
5968 return True;
5969
5970 def checkPreprocessorDirective(self, sLine):
5971 """
5972 Handles a preprocessor directive.
5973 """
5974 # Skip past the preprocessor hash.
5975 off = sLine.find('#');
5976 assert off >= 0;
5977 off += 1;
5978 while off < len(sLine) and sLine[off].isspace():
5979 off += 1;
5980
5981 # Extract the directive.
5982 offDirective = off;
5983 while off < len(sLine) and not sLine[off].isspace():
5984 off += 1;
5985 sDirective = sLine[offDirective:off];
5986 if self.fDebugPreproc:
5987 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5988
5989 # Skip spaces following it to where the arguments/whatever starts.
5990 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5991 off += 1;
5992 sTail = sLine[off:];
5993
5994 # Handle the directive.
5995 if sDirective == 'define':
5996 return self.workerPreprocessorDefine(sTail);
5997 if sDirective == 'undef':
5998 return self.workerPreprocessorUndef(sTail);
5999 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
6000 return self.workerPreprocessorIfOrElif(sDirective, sTail);
6001 if sDirective == 'else':
6002 return self.workerPreprocessorElse();
6003 if sDirective == 'endif':
6004 return self.workerPreprocessorEndif();
6005
6006 if self.fDebugPreproc:
6007 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
6008 return False;
6009
6010 def expandMacros(self, sLine, oMatch):
6011 """
6012 Expands macros we know about in the given line.
6013 Currently we ASSUME there is only one and that is what oMatch matched.
6014 """
6015 #
6016 # Get our bearings.
6017 #
6018 offMatch = oMatch.start();
6019 sName = oMatch.group(1);
6020 assert sName == sLine[oMatch.start() : oMatch.end()];
6021 fWithArgs = sName.endswith('(');
6022 if fWithArgs:
6023 sName = sName[:-1].strip();
6024 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
6025
6026 #
6027 # Deal with simple macro invocations w/o parameters.
6028 #
6029 if not fWithArgs:
6030 if self.fDebugPreproc:
6031 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6032 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6033
6034 #
6035 # Complicated macro with parameters.
6036 # Start by extracting the parameters. ASSUMES they are all on the same line!
6037 #
6038 cLevel = 1;
6039 offCur = oMatch.end();
6040 offCurArg = offCur;
6041 asArgs = [];
6042 while True:
6043 if offCur >= len(sLine):
6044 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6045 ch = sLine[offCur];
6046 if ch == '(':
6047 cLevel += 1;
6048 elif ch == ')':
6049 cLevel -= 1;
6050 if cLevel == 0:
6051 asArgs.append(sLine[offCurArg:offCur].strip());
6052 break;
6053 elif ch == ',' and cLevel == 1:
6054 asArgs.append(sLine[offCurArg:offCur].strip());
6055 offCurArg = offCur + 1;
6056 offCur += 1;
6057 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6058 asArgs = [];
6059 if len(oMacro.asArgs) != len(asArgs):
6060 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6061
6062 #
6063 # Do the expanding.
6064 #
6065 if self.fDebugPreproc:
6066 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6067 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6068
6069 def parse(self):
6070 """
6071 Parses the given file.
6072
6073 Returns number or errors.
6074 Raises exception on fatal trouble.
6075 """
6076 #self.debug('Parsing %s' % (self.sSrcFile,));
6077
6078 #
6079 # Loop thru the lines.
6080 #
6081 # Please mind that self.iLine may be updated by checkCodeForMacro and
6082 # other worker methods.
6083 #
6084 while self.iLine < len(self.asLines):
6085 sLine = self.asLines[self.iLine];
6086 self.iLine += 1;
6087 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6088
6089 # Expand macros we know about if we're currently in code.
6090 if self.iState == self.kiCode and self.oReMacros:
6091 oMatch = self.oReMacros.search(sLine);
6092 if oMatch:
6093 sLine = self.expandMacros(sLine, oMatch);
6094 if self.fDebugPreproc:
6095 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6096 self.asLines[self.iLine - 1] = sLine;
6097
6098 # Check for preprocessor directives before comments and other stuff.
6099 # ASSUMES preprocessor directives doesn't end with multiline comments.
6100 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6101 if self.fDebugPreproc:
6102 self.debug('line %d: preproc' % (self.iLine,));
6103 self.checkPreprocessorDirective(sLine);
6104 else:
6105 # Look for comments.
6106 offSlash = sLine.find('/');
6107 if offSlash >= 0:
6108 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6109 offLine = 0;
6110 while offLine < len(sLine):
6111 if self.iState == self.kiCode:
6112 # Look for substantial multiline comment so we pass the following MC as a whole line:
6113 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6114 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6115 offHit = sLine.find('/*', offLine);
6116 while offHit >= 0:
6117 offEnd = sLine.find('*/', offHit + 2);
6118 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6119 break;
6120 offHit = sLine.find('/*', offEnd);
6121
6122 if offHit >= 0:
6123 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6124 self.sComment = '';
6125 self.iCommentLine = self.iLine;
6126 self.iState = self.kiCommentMulti;
6127 offLine = offHit + 2;
6128 else:
6129 self.checkCodeForMacro(sLine[offLine:], offLine);
6130 offLine = len(sLine);
6131
6132 elif self.iState == self.kiCommentMulti:
6133 offHit = sLine.find('*/', offLine);
6134 if offHit >= 0:
6135 self.sComment += sLine[offLine:offHit];
6136 self.iState = self.kiCode;
6137 offLine = offHit + 2;
6138 self.parseComment();
6139 else:
6140 self.sComment += sLine[offLine:];
6141 offLine = len(sLine);
6142 else:
6143 assert False;
6144 # C++ line comment.
6145 elif offSlash > 0:
6146 self.checkCodeForMacro(sLine[:offSlash], 0);
6147
6148 # No slash, but append the line if in multi-line comment.
6149 elif self.iState == self.kiCommentMulti:
6150 #self.debug('line %d: multi' % (self.iLine,));
6151 self.sComment += sLine;
6152
6153 # No slash, but check code line for relevant macro.
6154 elif ( self.iState == self.kiCode
6155 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6156 #self.debug('line %d: macro' % (self.iLine,));
6157 self.checkCodeForMacro(sLine, 0);
6158
6159 # If the line is a '}' in the first position, complete the instructions.
6160 elif self.iState == self.kiCode and sLine[0] == '}':
6161 #self.debug('line %d: }' % (self.iLine,));
6162 self.doneInstructions(fEndOfFunction = True);
6163
6164 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6165 # so we can check/add @oppfx info from it.
6166 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6167 self.parseFunctionTable(sLine);
6168
6169 self.doneInstructions(fEndOfFunction = True);
6170 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6171 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6172 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6173 return self.printErrors();
6174
6175# Some sanity checking.
6176def __sanityCheckEFlagsClasses():
6177 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6178 for sAttrib, asFlags in dLists.items():
6179 for sFlag in asFlags:
6180 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6181__sanityCheckEFlagsClasses();
6182
6183## The parsed content of IEMAllInstCommonBodyMacros.h.
6184g_oParsedCommonBodyMacros = None # type: SimpleParser
6185
6186def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6187 """
6188 Parses one source file for instruction specfications.
6189 """
6190 #
6191 # Read sSrcFile into a line array.
6192 #
6193 try:
6194 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6195 except Exception as oXcpt:
6196 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6197 try:
6198 asLines = oFile.readlines();
6199 except Exception as oXcpt:
6200 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6201 finally:
6202 oFile.close();
6203
6204 #
6205 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6206 # can use the macros from it when processing the other files.
6207 #
6208 global g_oParsedCommonBodyMacros;
6209 if g_oParsedCommonBodyMacros is None:
6210 # Locate the file.
6211 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6212 if not os.path.isfile(sCommonBodyMacros):
6213 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6214
6215 # Read it.
6216 try:
6217 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6218 asIncFiles = oIncFile.readlines();
6219 except Exception as oXcpt:
6220 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6221
6222 # Parse it.
6223 try:
6224 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6225 if oParser.parse() != 0:
6226 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6227 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6228 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6229 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6230 oParser.cTotalMcBlocks,
6231 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6232 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6233 except ParserException as oXcpt:
6234 print(str(oXcpt), file = sys.stderr);
6235 raise;
6236 g_oParsedCommonBodyMacros = oParser;
6237
6238 #
6239 # Do the parsing.
6240 #
6241 try:
6242 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6243 return (oParser.parse(), oParser) ;
6244 except ParserException as oXcpt:
6245 print(str(oXcpt), file = sys.stderr);
6246 raise;
6247
6248
6249def __doTestCopying():
6250 """
6251 Executes the asCopyTests instructions.
6252 """
6253 asErrors = [];
6254 for oDstInstr in g_aoAllInstructions:
6255 if oDstInstr.asCopyTests:
6256 for sSrcInstr in oDstInstr.asCopyTests:
6257 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6258 if oSrcInstr:
6259 aoSrcInstrs = [oSrcInstr,];
6260 else:
6261 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6262 if aoSrcInstrs:
6263 for oSrcInstr in aoSrcInstrs:
6264 if oSrcInstr != oDstInstr:
6265 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6266 else:
6267 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6268 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6269 else:
6270 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6271 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6272
6273 if asErrors:
6274 sys.stderr.write(u''.join(asErrors));
6275 return len(asErrors);
6276
6277
6278def __applyOnlyTest():
6279 """
6280 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6281 all other instructions so that only these get tested.
6282 """
6283 if g_aoOnlyTestInstructions:
6284 for oInstr in g_aoAllInstructions:
6285 if oInstr.aoTests:
6286 if oInstr not in g_aoOnlyTestInstructions:
6287 oInstr.aoTests = [];
6288 return 0;
6289
6290## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6291g_aaoAllInstrFilesAndDefaultMapAndSet = (
6292 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6293 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6294 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6295 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6296 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6297 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6298 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6299 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6300 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6301);
6302
6303def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6304 """
6305 Parses all the IEMAllInstruction*.cpp.h files.
6306
6307 Returns a list of the parsers on success.
6308 Raises exception on failure.
6309 """
6310 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6311 cErrors = 0;
6312 aoParsers = [];
6313 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6314 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6315 sFilename = os.path.join(sSrcDir, sFilename);
6316 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6317 cErrors += cThisErrors;
6318 aoParsers.append(oParser);
6319 cErrors += __doTestCopying();
6320 cErrors += __applyOnlyTest();
6321
6322 # Total stub stats:
6323 cTotalStubs = 0;
6324 for oInstr in g_aoAllInstructions:
6325 cTotalStubs += oInstr.fStub;
6326 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6327 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6328 file = sys.stderr);
6329
6330 if cErrors != 0:
6331 raise Exception('%d parse errors' % (cErrors,));
6332 return aoParsers;
6333
6334
6335def parseFiles(asFiles, sHostArch = None):
6336 """
6337 Parses a selection of IEMAllInstruction*.cpp.h files.
6338
6339 Returns a list of the parsers on success.
6340 Raises exception on failure.
6341 """
6342 # Look up default maps for the files and call __parseFilesWorker to do the job.
6343 asFilesAndDefaultMap = [];
6344 for sFilename in asFiles:
6345 sName = os.path.split(sFilename)[1].lower();
6346 sMap = None;
6347 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6348 if aoInfo[0].lower() == sName:
6349 sMap = aoInfo[1];
6350 break;
6351 if not sMap:
6352 raise Exception('Unable to classify file: %s' % (sFilename,));
6353 asFilesAndDefaultMap.append((sFilename, sMap));
6354
6355 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6356
6357
6358def parseAll(sHostArch = None):
6359 """
6360 Parses all the IEMAllInstruction*.cpp.h files.
6361
6362 Returns a list of the parsers on success.
6363 Raises exception on failure.
6364 """
6365 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6366
6367
6368#
6369# Generators (may perhaps move later).
6370#
6371def __formatDisassemblerTableEntry(oInstr):
6372 """
6373 """
6374 sMacro = 'OP';
6375 cMaxOperands = 3;
6376 if len(oInstr.aoOperands) > 3:
6377 sMacro = 'OPVEX'
6378 cMaxOperands = 4;
6379 assert len(oInstr.aoOperands) <= cMaxOperands;
6380
6381 #
6382 # Format string.
6383 #
6384 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6385 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6386 sTmp += ' ' if iOperand == 0 else ',';
6387 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6388 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6389 else:
6390 sTmp += g_kdOpTypes[oOperand.sType][2];
6391 sTmp += '",';
6392 asColumns = [ sTmp, ];
6393
6394 #
6395 # Decoders.
6396 #
6397 iStart = len(asColumns);
6398 if oInstr.sEncoding is None:
6399 pass;
6400 elif oInstr.sEncoding == 'ModR/M':
6401 # ASSUME the first operand is using the ModR/M encoding
6402 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6403 asColumns.append('IDX_ParseModRM,');
6404 elif oInstr.sEncoding in [ 'prefix', ]:
6405 for oOperand in oInstr.aoOperands:
6406 asColumns.append('0,');
6407 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6408 pass;
6409 elif oInstr.sEncoding == 'VEX.ModR/M':
6410 asColumns.append('IDX_ParseModRM,');
6411 elif oInstr.sEncoding == 'vex2':
6412 asColumns.append('IDX_ParseVex2b,')
6413 elif oInstr.sEncoding == 'vex3':
6414 asColumns.append('IDX_ParseVex3b,')
6415 elif oInstr.sEncoding in g_dInstructionMaps:
6416 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6417 else:
6418 ## @todo
6419 #IDX_ParseTwoByteEsc,
6420 #IDX_ParseGrp1,
6421 #IDX_ParseShiftGrp2,
6422 #IDX_ParseGrp3,
6423 #IDX_ParseGrp4,
6424 #IDX_ParseGrp5,
6425 #IDX_Parse3DNow,
6426 #IDX_ParseGrp6,
6427 #IDX_ParseGrp7,
6428 #IDX_ParseGrp8,
6429 #IDX_ParseGrp9,
6430 #IDX_ParseGrp10,
6431 #IDX_ParseGrp12,
6432 #IDX_ParseGrp13,
6433 #IDX_ParseGrp14,
6434 #IDX_ParseGrp15,
6435 #IDX_ParseGrp16,
6436 #IDX_ParseThreeByteEsc4,
6437 #IDX_ParseThreeByteEsc5,
6438 #IDX_ParseModFence,
6439 #IDX_ParseEscFP,
6440 #IDX_ParseNopPause,
6441 #IDX_ParseInvOpModRM,
6442 assert False, str(oInstr);
6443
6444 # Check for immediates and stuff in the remaining operands.
6445 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6446 sIdx = g_kdOpTypes[oOperand.sType][0];
6447 #if sIdx != 'IDX_UseModRM':
6448 asColumns.append(sIdx + ',');
6449 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6450
6451 #
6452 # Opcode and operands.
6453 #
6454 assert oInstr.sDisEnum, str(oInstr);
6455 asColumns.append(oInstr.sDisEnum + ',');
6456 iStart = len(asColumns)
6457 for oOperand in oInstr.aoOperands:
6458 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6459 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6460
6461 #
6462 # Flags.
6463 #
6464 sTmp = '';
6465 for sHint in sorted(oInstr.dHints.keys()):
6466 sDefine = g_kdHints[sHint];
6467 if sDefine.startswith('DISOPTYPE_'):
6468 if sTmp:
6469 sTmp += ' | ' + sDefine;
6470 else:
6471 sTmp += sDefine;
6472 if sTmp:
6473 sTmp += '),';
6474 else:
6475 sTmp += '0),';
6476 asColumns.append(sTmp);
6477
6478 #
6479 # Format the columns into a line.
6480 #
6481 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6482 sLine = '';
6483 for i, s in enumerate(asColumns):
6484 if len(sLine) < aoffColumns[i]:
6485 sLine += ' ' * (aoffColumns[i] - len(sLine));
6486 else:
6487 sLine += ' ';
6488 sLine += s;
6489
6490 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6491 # DISOPTYPE_HARMLESS),
6492 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6493 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6494 return sLine;
6495
6496def __checkIfShortTable(aoTableOrdered, oMap):
6497 """
6498 Returns (iInstr, cInstructions, fShortTable)
6499 """
6500
6501 # Determin how much we can trim off.
6502 cInstructions = len(aoTableOrdered);
6503 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6504 cInstructions -= 1;
6505
6506 iInstr = 0;
6507 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6508 iInstr += 1;
6509
6510 # If we can save more than 30%, we go for the short table version.
6511 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6512 return (iInstr, cInstructions, True);
6513 _ = oMap; # Use this for overriding.
6514
6515 # Output the full table.
6516 return (0, len(aoTableOrdered), False);
6517
6518def generateDisassemblerTables(oDstFile = sys.stdout):
6519 """
6520 Generates disassembler tables.
6521
6522 Returns exit code.
6523 """
6524
6525 #
6526 # Parse all.
6527 #
6528 try:
6529 parseAll();
6530 except Exception as oXcpt:
6531 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6532 traceback.print_exc(file = sys.stderr);
6533 return 1;
6534
6535
6536 #
6537 # The disassembler uses a slightly different table layout to save space,
6538 # since several of the prefix varia
6539 #
6540 aoDisasmMaps = [];
6541 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6542 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6543 if oMap.sSelector != 'byte+pfx':
6544 aoDisasmMaps.append(oMap);
6545 else:
6546 # Split the map by prefix.
6547 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6548 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6549 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6550 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6551
6552 #
6553 # Dump each map.
6554 #
6555 asHeaderLines = [];
6556 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6557 for oMap in aoDisasmMaps:
6558 sName = oMap.sName;
6559
6560 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6561
6562 #
6563 # Get the instructions for the map and see if we can do a short version or not.
6564 #
6565 aoTableOrder = oMap.getInstructionsInTableOrder();
6566 cEntriesPerByte = oMap.getEntriesPerByte();
6567 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6568
6569 #
6570 # Output the table start.
6571 # Note! Short tables are static and only accessible via the map range record.
6572 #
6573 asLines = [];
6574 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6575 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6576 if fShortTable:
6577 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6578 else:
6579 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6580 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6581 asLines.append('{');
6582
6583 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6584 asLines.append(' /* %#04x: */' % (iInstrStart,));
6585
6586 #
6587 # Output the instructions.
6588 #
6589 iInstr = iInstrStart;
6590 while iInstr < iInstrEnd:
6591 oInstr = aoTableOrder[iInstr];
6592 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6593 if iInstr != iInstrStart:
6594 asLines.append('');
6595 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6596
6597 if oInstr is None:
6598 # Invalid. Optimize blocks of invalid instructions.
6599 cInvalidInstrs = 1;
6600 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6601 cInvalidInstrs += 1;
6602 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6603 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6604 iInstr += 0x10 * cEntriesPerByte - 1;
6605 elif cEntriesPerByte > 1:
6606 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6607 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6608 iInstr += 3;
6609 else:
6610 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6611 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6612 else:
6613 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6614 elif isinstance(oInstr, list):
6615 if len(oInstr) != 0:
6616 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6617 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6618 else:
6619 asLines.append(__formatDisassemblerTableEntry(oInstr));
6620 else:
6621 asLines.append(__formatDisassemblerTableEntry(oInstr));
6622
6623 iInstr += 1;
6624
6625 if iInstrStart >= iInstrEnd:
6626 asLines.append(' /* dummy */ INVALID_OPCODE');
6627
6628 asLines.append('};');
6629 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6630
6631 #
6632 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6633 #
6634 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6635 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6636 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6637
6638 #
6639 # Write out the lines.
6640 #
6641 oDstFile.write('\n'.join(asLines));
6642 oDstFile.write('\n');
6643 oDstFile.write('\n');
6644 #break; #for now
6645 return 0;
6646
6647if __name__ == '__main__':
6648 sys.exit(generateDisassemblerTables());
6649
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette