VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 100840

Last change on this file since 100840 was 100840, checked in by vboxsync, 20 months ago

VMM/IEM: More conversion from IEM_MC_MEM_MAP to IEM_MC_MEM_MAP_XXX. Correct 32-bit size BT instruction to not clear high bits. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 273.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 100840 2023-08-09 17:52:37Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 100840 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCall(McCppGeneric):
1885 """
1886 A generic C++/C call statement.
1887
1888 The sName is still 'C++', so the function name is in the first parameter
1889 and the the arguments in the subsequent ones.
1890 """
1891 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1892 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1893 self.asParams.extend(asArgs);
1894
1895 def renderCode(self, cchIndent = 0):
1896 cchIndent += self.cchIndent;
1897 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1898 if self.fDecode:
1899 sRet += ' // C++ decode\n';
1900 else:
1901 sRet += ' // C++ normal\n';
1902 return sRet;
1903
1904class McCppCond(McStmtCond):
1905 """
1906 C++/C 'if' statement.
1907 """
1908 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1909 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1910 self.fDecode = fDecode;
1911 self.cchIndent = cchIndent;
1912
1913 def renderCode(self, cchIndent = 0):
1914 cchIndent += self.cchIndent;
1915 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1916 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1917 sRet += ' ' * cchIndent + '{\n';
1918 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1919 sRet += ' ' * cchIndent + '}\n';
1920 if self.aoElseBranch:
1921 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1922 sRet += ' ' * cchIndent + '{\n';
1923 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1924 sRet += ' ' * cchIndent + '}\n';
1925 return sRet;
1926
1927class McCppPreProc(McCppGeneric):
1928 """
1929 C++/C Preprocessor directive.
1930 """
1931 def __init__(self, sCode):
1932 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1933
1934 def renderCode(self, cchIndent = 0):
1935 return self.asParams[0] + '\n';
1936
1937
1938class McBlock(object):
1939 """
1940 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1941 """
1942
1943 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1944 ## The source file containing the block.
1945 self.sSrcFile = sSrcFile;
1946 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1947 self.iBeginLine = iBeginLine;
1948 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1949 self.offBeginLine = offBeginLine;
1950 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1951 self.iEndLine = -1;
1952 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
1953 self.offEndLine = 0;
1954 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
1955 self.offAfterEnd = 0;
1956 ## The function the block resides in.
1957 self.oFunction = oFunction;
1958 ## The name of the function the block resides in. DEPRECATED.
1959 self.sFunction = oFunction.sName;
1960 ## The block number within the function.
1961 self.iInFunction = iInFunction;
1962 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1963 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1964 ## Decoded statements in the block.
1965 self.aoStmts = [] # type: list(McStmt)
1966
1967 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
1968 """
1969 Completes the microcode block.
1970 """
1971 assert self.iEndLine == -1;
1972 self.iEndLine = iEndLine;
1973 self.offEndLine = offEndLine;
1974 self.offAfterEnd = offAfterEnd;
1975 self.asLines = asLines;
1976
1977 def raiseDecodeError(self, sRawCode, off, sMessage):
1978 """ Raises a decoding error. """
1979 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1980 iLine = sRawCode.count('\n', 0, off);
1981 raise ParserException('%s:%d:%d: parsing error: %s'
1982 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1983
1984 def raiseStmtError(self, sName, sMessage):
1985 """ Raises a statement parser error. """
1986 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1987
1988 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1989 """ Check the parameter count, raising an error it doesn't match. """
1990 if len(asParams) != cParamsExpected:
1991 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1992 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1993 return True;
1994
1995 @staticmethod
1996 def parseMcGeneric(oSelf, sName, asParams):
1997 """ Generic parser that returns a plain McStmt object. """
1998 _ = oSelf;
1999 return McStmt(sName, asParams);
2000
2001 @staticmethod
2002 def parseMcGenericCond(oSelf, sName, asParams):
2003 """ Generic parser that returns a plain McStmtCond object. """
2004 _ = oSelf;
2005 return McStmtCond(sName, asParams);
2006
2007 @staticmethod
2008 def parseMcBegin(oSelf, sName, asParams):
2009 """ IEM_MC_BEGIN """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2012
2013 @staticmethod
2014 def parseMcArg(oSelf, sName, asParams):
2015 """ IEM_MC_ARG """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2018
2019 @staticmethod
2020 def parseMcArgConst(oSelf, sName, asParams):
2021 """ IEM_MC_ARG_CONST """
2022 oSelf.checkStmtParamCount(sName, asParams, 4);
2023 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2024
2025 @staticmethod
2026 def parseMcArgLocalRef(oSelf, sName, asParams):
2027 """ IEM_MC_ARG_LOCAL_REF """
2028 oSelf.checkStmtParamCount(sName, asParams, 4);
2029 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2030
2031 @staticmethod
2032 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2033 """ IEM_MC_ARG_LOCAL_EFLAGS """
2034 oSelf.checkStmtParamCount(sName, asParams, 3);
2035 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2036 return (
2037 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2038 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2039 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2040 );
2041
2042 @staticmethod
2043 def parseMcLocal(oSelf, sName, asParams):
2044 """ IEM_MC_LOCAL """
2045 oSelf.checkStmtParamCount(sName, asParams, 2);
2046 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2047
2048 @staticmethod
2049 def parseMcLocalConst(oSelf, sName, asParams):
2050 """ IEM_MC_LOCAL_CONST """
2051 oSelf.checkStmtParamCount(sName, asParams, 3);
2052 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2053
2054 @staticmethod
2055 def parseMcCallAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_AIMPL_3|4 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2059 return McStmtCall(sName, asParams, 1, 0);
2060
2061 @staticmethod
2062 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2070 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2071 cArgs = int(sName[-1]);
2072 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2073 return McStmtCall(sName, asParams, 0);
2074
2075 @staticmethod
2076 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2077 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2078 cArgs = int(sName[-1]);
2079 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2080 return McStmtCall(sName, asParams, 0);
2081
2082 @staticmethod
2083 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2084 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2085 cArgs = int(sName[-1]);
2086 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2087 return McStmtCall(sName, asParams, 0);
2088
2089 @staticmethod
2090 def parseMcCallSseAImpl(oSelf, sName, asParams):
2091 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2092 cArgs = int(sName[-1]);
2093 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2094 return McStmtCall(sName, asParams, 0);
2095
2096 @staticmethod
2097 def parseMcCallCImpl(oSelf, sName, asParams):
2098 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2099 cArgs = int(sName[-1]);
2100 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2101 return McStmtCall(sName, asParams, 1);
2102
2103 @staticmethod
2104 def stripComments(sCode):
2105 """ Returns sCode with comments removed. """
2106 off = 0;
2107 while off < len(sCode):
2108 off = sCode.find('/', off);
2109 if off < 0 or off + 1 >= len(sCode):
2110 break;
2111
2112 if sCode[off + 1] == '/':
2113 # C++ comment.
2114 offEnd = sCode.find('\n', off + 2);
2115 if offEnd < 0:
2116 return sCode[:off].rstrip();
2117 sCode = sCode[ : off] + sCode[offEnd : ];
2118 off += 1;
2119
2120 elif sCode[off + 1] == '*':
2121 # C comment
2122 offEnd = sCode.find('*/', off + 2);
2123 if offEnd < 0:
2124 return sCode[:off].rstrip();
2125 sSep = ' ';
2126 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2127 sSep = '';
2128 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2129 off += len(sSep);
2130
2131 else:
2132 # Not a comment.
2133 off += 1;
2134 return sCode;
2135
2136 @staticmethod
2137 def extractParam(sCode, offParam):
2138 """
2139 Extracts the parameter value at offParam in sCode.
2140 Returns stripped value and the end offset of the terminating ',' or ')'.
2141 """
2142 # Extract it.
2143 cNesting = 0;
2144 offStart = offParam;
2145 while offParam < len(sCode):
2146 ch = sCode[offParam];
2147 if ch == '(':
2148 cNesting += 1;
2149 elif ch == ')':
2150 if cNesting == 0:
2151 break;
2152 cNesting -= 1;
2153 elif ch == ',' and cNesting == 0:
2154 break;
2155 offParam += 1;
2156 return (sCode[offStart : offParam].strip(), offParam);
2157
2158 @staticmethod
2159 def extractParams(sCode, offOpenParen):
2160 """
2161 Parses a parameter list.
2162 Returns the list of parameter values and the offset of the closing parentheses.
2163 Returns (None, len(sCode)) on if no closing parentheses was found.
2164 """
2165 assert sCode[offOpenParen] == '(';
2166 asParams = [];
2167 off = offOpenParen + 1;
2168 while off < len(sCode):
2169 ch = sCode[off];
2170 if ch.isspace():
2171 off += 1;
2172 elif ch != ')':
2173 (sParam, off) = McBlock.extractParam(sCode, off);
2174 asParams.append(sParam);
2175 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2176 if sCode[off] == ',':
2177 off += 1;
2178 else:
2179 return (asParams, off);
2180 return (None, off);
2181
2182 @staticmethod
2183 def findClosingBraces(sCode, off, offStop):
2184 """
2185 Finds the matching '}' for the '{' at off in sCode.
2186 Returns offset of the matching '}' on success, otherwise -1.
2187
2188 Note! Does not take comments into account.
2189 """
2190 cDepth = 1;
2191 off += 1;
2192 while off < offStop:
2193 offClose = sCode.find('}', off, offStop);
2194 if offClose < 0:
2195 break;
2196 cDepth += sCode.count('{', off, offClose);
2197 cDepth -= 1;
2198 if cDepth == 0:
2199 return offClose;
2200 off = offClose + 1;
2201 return -1;
2202
2203 @staticmethod
2204 def countSpacesAt(sCode, off, offStop):
2205 """ Returns the number of space characters at off in sCode. """
2206 offStart = off;
2207 while off < offStop and sCode[off].isspace():
2208 off += 1;
2209 return off - offStart;
2210
2211 @staticmethod
2212 def skipSpacesAt(sCode, off, offStop):
2213 """ Returns first offset at or after off for a non-space character. """
2214 return off + McBlock.countSpacesAt(sCode, off, offStop);
2215
2216 @staticmethod
2217 def isSubstrAt(sStr, off, sSubStr):
2218 """ Returns true of sSubStr is found at off in sStr. """
2219 return sStr[off : off + len(sSubStr)] == sSubStr;
2220
2221 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2222 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2223 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2224 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2225 + r')');
2226
2227 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2228 """
2229 Decodes sRawCode[off : offStop].
2230
2231 Returns list of McStmt instances.
2232 Raises ParserException on failure.
2233 """
2234 if offStop < 0:
2235 offStop = len(sRawCode);
2236 aoStmts = [];
2237 while off < offStop:
2238 ch = sRawCode[off];
2239
2240 #
2241 # Skip spaces and comments.
2242 #
2243 if ch.isspace():
2244 off += 1;
2245
2246 elif ch == '/':
2247 ch = sRawCode[off + 1];
2248 if ch == '/': # C++ comment.
2249 off = sRawCode.find('\n', off + 2);
2250 if off < 0:
2251 break;
2252 off += 1;
2253 elif ch == '*': # C comment.
2254 off = sRawCode.find('*/', off + 2);
2255 if off < 0:
2256 break;
2257 off += 2;
2258 else:
2259 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2260
2261 #
2262 # Is it a MC statement.
2263 #
2264 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2265 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2266 # Extract it and strip comments from it.
2267 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2268 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2269 if offEnd <= off:
2270 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2271 else:
2272 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2273 if offEnd <= off:
2274 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2275 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2276 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2277 offEnd -= 1;
2278 while offEnd > off and sRawCode[offEnd - 1].isspace():
2279 offEnd -= 1;
2280
2281 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2282
2283 # Isolate the statement name.
2284 offOpenParen = sRawStmt.find('(');
2285 if offOpenParen < 0:
2286 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2287 sName = sRawStmt[: offOpenParen].strip();
2288
2289 # Extract the parameters.
2290 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2291 if asParams is None:
2292 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2293 if offCloseParen + 1 != len(sRawStmt):
2294 self.raiseDecodeError(sRawCode, off,
2295 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2296
2297 # Hand it to the handler.
2298 fnParser = g_dMcStmtParsers.get(sName)[0];
2299 if not fnParser:
2300 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2301 oStmt = fnParser(self, sName, asParams);
2302 if not isinstance(oStmt, (list, tuple)):
2303 aoStmts.append(oStmt);
2304 else:
2305 aoStmts.extend(oStmt);
2306
2307 #
2308 # If conditional, we need to parse the whole statement.
2309 #
2310 # For reasons of simplicity, we assume the following structure
2311 # and parse each branch in a recursive call:
2312 # IEM_MC_IF_XXX() {
2313 # IEM_MC_WHATEVER();
2314 # } IEM_MC_ELSE() {
2315 # IEM_MC_WHATEVER();
2316 # } IEM_MC_ENDIF();
2317 #
2318 if sName.startswith('IEM_MC_IF_'):
2319 if iLevel > 1:
2320 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2321
2322 # Find start of the IF block:
2323 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2324 if sRawCode[offBlock1] != '{':
2325 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2326
2327 # Find the end of it.
2328 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2329 if offBlock1End < 0:
2330 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2331
2332 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2333
2334 # Is there an else section?
2335 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2336 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2337 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2338 if sRawCode[off] != '(':
2339 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2340 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2341 if sRawCode[off] != ')':
2342 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2343
2344 # Find start of the ELSE block.
2345 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2346 if sRawCode[offBlock2] != '{':
2347 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2348
2349 # Find the end of it.
2350 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2351 if offBlock2End < 0:
2352 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2353
2354 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2355 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2356
2357 # Parse past the endif statement.
2358 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2359 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2360 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2361 if sRawCode[off] != '(':
2362 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2363 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2364 if sRawCode[off] != ')':
2365 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2366 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2367 if sRawCode[off] != ';':
2368 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2369 off += 1;
2370
2371 else:
2372 # Advance.
2373 off = offEnd + 1;
2374
2375 #
2376 # Otherwise it must be a C/C++ statement of sorts.
2377 #
2378 else:
2379 # Find the end of the statement. if and else requires special handling.
2380 sCondExpr = None;
2381 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2382 if oMatch:
2383 if oMatch.group(1)[-1] == '(':
2384 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2385 else:
2386 offEnd = oMatch.end();
2387 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2388 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2389 elif ch == '#':
2390 offEnd = sRawCode.find('\n', off, offStop);
2391 if offEnd < 0:
2392 offEnd = offStop;
2393 offEnd -= 1;
2394 while offEnd > off and sRawCode[offEnd - 1].isspace():
2395 offEnd -= 1;
2396 else:
2397 offEnd = sRawCode.find(';', off);
2398 if offEnd < 0:
2399 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2400
2401 # Check this and the following statement whether it might have
2402 # something to do with decoding. This is a statement filter
2403 # criteria when generating the threaded functions blocks.
2404 offNextEnd = sRawCode.find(';', offEnd + 1);
2405 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2406 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2407 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2408 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2409 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2410 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2411 );
2412
2413 if not oMatch:
2414 if ch != '#':
2415 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2416 else:
2417 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2418 off = offEnd + 1;
2419 elif oMatch.group(1).startswith('if'):
2420 #
2421 # if () xxx [else yyy] statement.
2422 #
2423 oStmt = McCppCond(sCondExpr, fDecode);
2424 aoStmts.append(oStmt);
2425 off = offEnd + 1;
2426
2427 # Following the if () we can either have a {} containing zero or more statements
2428 # or we have a single statement.
2429 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2430 if sRawCode[offBlock1] == '{':
2431 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2432 if offBlock1End < 0:
2433 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2434 offBlock1 += 1;
2435 else:
2436 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2437 if offBlock1End < 0:
2438 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2439
2440 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2441
2442 # The else is optional and can likewise be followed by {} or a single statement.
2443 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2444 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2445 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2446 if sRawCode[offBlock2] == '{':
2447 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2448 if offBlock2End < 0:
2449 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2450 offBlock2 += 1;
2451 else:
2452 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2453 if offBlock2End < 0:
2454 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2455
2456 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2457 off = offBlock2End + 1;
2458
2459 elif oMatch.group(1) == 'else':
2460 # Problematic 'else' branch, typically involving #ifdefs.
2461 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2462
2463 return aoStmts;
2464
2465 def decode(self):
2466 """
2467 Decodes the block, populating self.aoStmts if necessary.
2468 Returns the statement list.
2469 Raises ParserException on failure.
2470 """
2471 if not self.aoStmts:
2472 self.aoStmts = self.decodeCode(''.join(self.asLines));
2473 return self.aoStmts;
2474
2475
2476 def checkForTooEarlyEffSegUse(self, aoStmts):
2477 """
2478 Checks if iEffSeg is used before the effective address has been decoded.
2479 Returns None on success, error string on failure.
2480
2481 See r158454 for an example of this issue.
2482 """
2483
2484 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2485 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2486 # as we're ASSUMING these will not occur before address calculation.
2487 for iStmt, oStmt in enumerate(aoStmts):
2488 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2489 while iStmt > 0:
2490 iStmt -= 1;
2491 oStmt = aoStmts[iStmt];
2492 for sArg in oStmt.asParams:
2493 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2494 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2495 break;
2496 return None;
2497
2498 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2499 kdDecodeCppStmtOkayAfterDone = {
2500 'IEMOP_HLP_IN_VMX_OPERATION': True,
2501 'IEMOP_HLP_VMX_INSTR': True,
2502 };
2503
2504 def checkForDoneDecoding(self, aoStmts):
2505 """
2506 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2507 invocation.
2508 Returns None on success, error string on failure.
2509
2510 This ensures safe instruction restarting in case the recompiler runs
2511 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2512 entries).
2513 """
2514
2515 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2516 # don't need to look.
2517 cIemOpHlpDone = 0;
2518 for iStmt, oStmt in enumerate(aoStmts):
2519 if oStmt.isCppStmt():
2520 #print('dbg: #%u[%u]: %s %s (%s)'
2521 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2522
2523 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2524 if oMatch:
2525 sFirstWord = oMatch.group(1);
2526 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2527 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2528 cIemOpHlpDone += 1;
2529 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2530 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2531 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2532 else:
2533 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2534 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2535 cIemOpHlpDone += 1;
2536 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2537 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2538 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2539 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2540 if cIemOpHlpDone == 1:
2541 return None;
2542 if cIemOpHlpDone > 1:
2543 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2544 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2545
2546 def check(self):
2547 """
2548 Performs some sanity checks on the block.
2549 Returns error string list, empty if all is fine.
2550 """
2551 aoStmts = self.decode();
2552 asRet = [];
2553
2554 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2555 if sRet:
2556 asRet.append(sRet);
2557
2558 sRet = self.checkForDoneDecoding(aoStmts);
2559 if sRet:
2560 asRet.append(sRet);
2561
2562 return asRet;
2563
2564
2565
2566## IEM_MC_XXX -> parser + info dictionary.
2567#
2568# The info is currently a single boolean entry indicating whether the
2569# statement modifies state and must not be used before IEMOP_HL_DONE_*.
2570#
2571# The raw table was generated via the following command
2572# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2573# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2574g_dMcStmtParsers = {
2575 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2576 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2577 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2578 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2579 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2580 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2581 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True),
2582 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False),
2583 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True),
2584 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False),
2585 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True),
2586 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False),
2587 'IEM_MC_ADD_GREG_U8': (McBlock.parseMcGeneric, True),
2588 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False),
2589 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2590 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2591 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2592 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True),
2593 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False),
2594 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False),
2595 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False),
2596 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False),
2597 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True),
2598 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True),
2599 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True),
2600 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True),
2601 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False),
2602 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False),
2603 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False),
2604 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False),
2605 'IEM_MC_ARG': (McBlock.parseMcArg, False),
2606 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False),
2607 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False),
2608 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False),
2609 'IEM_MC_ASSIGN': (McBlock.parseMcGeneric, False),
2610 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False),
2611 'IEM_MC_ASSIGN_U8_SX_U64': (McBlock.parseMcGeneric, False),
2612 'IEM_MC_ASSIGN_U32_SX_U64': (McBlock.parseMcGeneric, False),
2613 'IEM_MC_BEGIN': (McBlock.parseMcGeneric, False),
2614 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2615 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2616 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2617 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2618 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2619 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2620 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2621 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2622 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2623 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False),
2624 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False),
2625 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False),
2626 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False),
2627 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True),
2628 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True),
2629 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True),
2630 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True),
2631 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True),
2632 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True),
2633 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True),
2634 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True),
2635 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True),
2636 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True),
2637 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True),
2638 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True),
2639 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True),
2640 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True),
2641 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True),
2642 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True),
2643 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True),
2644 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True),
2645 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True),
2646 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True),
2647 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True),
2648 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True),
2649 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True),
2650 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True),
2651 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True),
2652 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': (McBlock.parseMcGeneric, True),
2653 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True),
2654 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True),
2655 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True),
2656 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True),
2657 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2658 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2659 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2660 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcGeneric, False),
2661 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcGeneric, False),
2662 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcGeneric, False),
2663 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcGeneric, False),
2664 'IEM_MC_END': (McBlock.parseMcGeneric, True),
2665 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False),
2666 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False),
2667 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False),
2668 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False),
2669 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False),
2670 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False),
2671 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False),
2672 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False),
2673 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False),
2674 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False),
2675 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False),
2676 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False),
2677 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False),
2678 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False),
2679 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False),
2680 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False),
2681 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False),
2682 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False),
2683 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False),
2684 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False),
2685 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False),
2686 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True),
2687 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True),
2688 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True),
2689 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True),
2690 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True),
2691 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True),
2692 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True),
2693 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True),
2694 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True),
2695 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2696 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True),
2697 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True),
2698 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True),
2699 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True),
2700 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True),
2701 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True),
2702 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True),
2703 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True),
2704 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2705 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True),
2706 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True),
2707 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True),
2708 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True),
2709 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2710 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True),
2711 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True),
2712 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True),
2713 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True),
2714 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True),
2715 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True),
2716 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True),
2717 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True),
2718 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True),
2719 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True),
2720 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True),
2721 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2722 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True),
2723 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True),
2724 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True),
2725 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True),
2726 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2727 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True),
2728 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True),
2729 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True),
2730 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False),
2731 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False),
2732 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False),
2733 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False),
2734 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False),
2735 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False),
2736 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False),
2737 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False),
2738 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False),
2739 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False),
2740 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False),
2741 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False),
2742 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False),
2743 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False),
2744 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False),
2745 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False),
2746 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False),
2747 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False),
2748 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True),
2749 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True),
2750 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True),
2751 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True),
2752 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True),
2753 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True),
2754 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2755 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True),
2756 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True),
2757 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True),
2758 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2759 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2760 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True),
2761 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2762 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True),
2763 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True),
2764 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2765 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2766 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True),
2767 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2768 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2769 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True),
2770 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2771 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True),
2772 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2773 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True),
2774 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True),
2775 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True),
2776 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True),
2777 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True),
2778 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True),
2779 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True),
2780 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2781 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True),
2782 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True),
2783 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True),
2784 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True),
2785 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2786 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2787 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2788 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True),
2789 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcGeneric, False),
2790 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True),
2791 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False),
2792 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False),
2793 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2794 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2795 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True),
2796 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True),
2797 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2798 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True),
2799 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2800 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2801 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2802 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True),
2803 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True),
2804 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True),
2805 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True),
2806 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True),
2807 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True),
2808 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True),
2809 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True),
2810 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True),
2811 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True),
2812 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True),
2813 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True),
2814 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True),
2815 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True),
2816 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True),
2817 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True),
2818 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True),
2819 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True),
2820 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True),
2821 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2822 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2823 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2824 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2825 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2826 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2827 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True),
2828 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True),
2829 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False),
2830 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True),
2831 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True),
2832 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True),
2833 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True),
2834 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False),
2835 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False),
2836 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2837 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True),
2838 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True),
2839 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True),
2840 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False),
2841 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False),
2842 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False),
2843 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True),
2844 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2845 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True),
2846 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True),
2847 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True),
2848 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True),
2849 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True),
2850 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True),
2851 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True),
2852 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True),
2853 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2854 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False),
2855 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False),
2856 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False),
2857 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False),
2858 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False),
2859 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False),
2860 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False),
2861 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False),
2862 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False),
2863 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False),
2864 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False),
2865 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False),
2866 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False),
2867 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False),
2868 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False),
2869 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False),
2870 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False),
2871 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False),
2872 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False),
2873 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False),
2874 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False),
2875 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False),
2876 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False),
2877 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False),
2878 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False),
2879 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False),
2880 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False),
2881 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False),
2882 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False),
2883 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True),
2884 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True),
2885 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True),
2886 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False),
2887 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False),
2888 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False),
2889 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False),
2890 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True),
2891 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True),
2892 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True),
2893 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True),
2894 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True),
2895 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False),
2896 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False),
2897 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False),
2898 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2899 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True),
2900 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True),
2901 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2902 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True),
2903 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2904 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True),
2905 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True),
2906 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True),
2907 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True),
2908 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True),
2909 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True),
2910 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True),
2911 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True),
2912 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True),
2913 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True),
2914 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2915 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2916 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2917 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2918 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True),
2919 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True),
2920 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True),
2921 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True),
2922 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True),
2923 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2924 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True),
2925 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True),
2926 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True),
2927 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2928 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True),
2929 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True),
2930 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True),
2931 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True),
2932 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True),
2933 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True),
2934 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2935 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True),
2936 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True),
2937 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True),
2938 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True),
2939 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True),
2940 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True),
2941 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True),
2942 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True),
2943 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True),
2944 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True),
2945 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True),
2946 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True),
2947 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True),
2948 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True),
2949 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True),
2950 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True),
2951 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True),
2952 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True),
2953 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True),
2954 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2955 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2956 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2957 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2958 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True),
2959 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True),
2960 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True),
2961 'IEM_MC_SUB_GREG_U8': (McBlock.parseMcGeneric, True),
2962 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False),
2963 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True),
2964 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True),
2965 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True),
2966 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True),
2967 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2968 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True),
2969 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2970};
2971
2972## List of microcode blocks.
2973g_aoMcBlocks = [] # type: list(McBlock)
2974
2975
2976
2977class ParserException(Exception):
2978 """ Parser exception """
2979 def __init__(self, sMessage):
2980 Exception.__init__(self, sMessage);
2981
2982
2983class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2984 """
2985 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2986 """
2987
2988 ## @name Parser state.
2989 ## @{
2990 kiCode = 0;
2991 kiCommentMulti = 1;
2992 ## @}
2993
2994 class Macro(object):
2995 """ Macro """
2996 def __init__(self, sName, asArgs, sBody, iLine):
2997 self.sName = sName; ##< The macro name.
2998 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2999 self.sBody = sBody;
3000 self.iLine = iLine;
3001 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3002
3003 @staticmethod
3004 def _needSpace(ch):
3005 """ This is just to make the expanded output a bit prettier. """
3006 return ch.isspace() and ch != '(';
3007
3008 def expandMacro(self, oParent, asArgs = None):
3009 """ Expands the macro body with the given arguments. """
3010 _ = oParent;
3011 sBody = self.sBody;
3012
3013 if self.oReArgMatch:
3014 assert len(asArgs) == len(self.asArgs);
3015 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3016
3017 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3018 oMatch = self.oReArgMatch.search(sBody);
3019 while oMatch:
3020 sName = oMatch.group(2);
3021 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3022 sValue = dArgs[sName];
3023 sPre = '';
3024 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3025 sPre = ' ';
3026 sPost = '';
3027 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3028 sPost = ' ';
3029 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3030 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3031 else:
3032 assert not asArgs;
3033
3034 return sBody;
3035
3036
3037 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
3038 self.sSrcFile = sSrcFile;
3039 self.asLines = asLines;
3040 self.iLine = 0;
3041 self.iState = self.kiCode;
3042 self.sComment = '';
3043 self.iCommentLine = 0;
3044 self.aoCurInstrs = [] # type: list(Instruction)
3045 self.oCurFunction = None # type: DecoderFunction
3046 self.iMcBlockInFunc = 0;
3047 self.oCurMcBlock = None # type: McBlock
3048 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
3049 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3050 if oInheritMacrosFrom:
3051 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3052 self.oReMacros = oInheritMacrosFrom.oReMacros;
3053
3054 assert sDefaultMap in g_dInstructionMaps;
3055 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3056
3057 self.cTotalInstr = 0;
3058 self.cTotalStubs = 0;
3059 self.cTotalTagged = 0;
3060 self.cTotalMcBlocks = 0;
3061
3062 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3063 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3064 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3065 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3066 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3067 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3068 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3069 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3070 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
3071 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3072 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3073 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
3074 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3075 self.fDebug = True;
3076 self.fDebugMc = False;
3077 self.fDebugPreProc = False;
3078
3079 self.dTagHandlers = {
3080 '@opbrief': self.parseTagOpBrief,
3081 '@opdesc': self.parseTagOpDesc,
3082 '@opmnemonic': self.parseTagOpMnemonic,
3083 '@op1': self.parseTagOpOperandN,
3084 '@op2': self.parseTagOpOperandN,
3085 '@op3': self.parseTagOpOperandN,
3086 '@op4': self.parseTagOpOperandN,
3087 '@oppfx': self.parseTagOpPfx,
3088 '@opmaps': self.parseTagOpMaps,
3089 '@opcode': self.parseTagOpcode,
3090 '@opcodesub': self.parseTagOpcodeSub,
3091 '@openc': self.parseTagOpEnc,
3092 '@opfltest': self.parseTagOpEFlags,
3093 '@opflmodify': self.parseTagOpEFlags,
3094 '@opflundef': self.parseTagOpEFlags,
3095 '@opflset': self.parseTagOpEFlags,
3096 '@opflclear': self.parseTagOpEFlags,
3097 '@ophints': self.parseTagOpHints,
3098 '@opdisenum': self.parseTagOpDisEnum,
3099 '@opmincpu': self.parseTagOpMinCpu,
3100 '@opcpuid': self.parseTagOpCpuId,
3101 '@opgroup': self.parseTagOpGroup,
3102 '@opunused': self.parseTagOpUnusedInvalid,
3103 '@opinvalid': self.parseTagOpUnusedInvalid,
3104 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3105 '@optest': self.parseTagOpTest,
3106 '@optestign': self.parseTagOpTestIgnore,
3107 '@optestignore': self.parseTagOpTestIgnore,
3108 '@opcopytests': self.parseTagOpCopyTests,
3109 '@oponly': self.parseTagOpOnlyTest,
3110 '@oponlytest': self.parseTagOpOnlyTest,
3111 '@opxcpttype': self.parseTagOpXcptType,
3112 '@opstats': self.parseTagOpStats,
3113 '@opfunction': self.parseTagOpFunction,
3114 '@opdone': self.parseTagOpDone,
3115 };
3116 for i in range(48):
3117 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3118 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3119
3120 self.asErrors = [];
3121
3122 def raiseError(self, sMessage):
3123 """
3124 Raise error prefixed with the source and line number.
3125 """
3126 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3127
3128 def raiseCommentError(self, iLineInComment, sMessage):
3129 """
3130 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3131 """
3132 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3133
3134 def error(self, sMessage):
3135 """
3136 Adds an error.
3137 returns False;
3138 """
3139 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3140 return False;
3141
3142 def errorOnLine(self, iLine, sMessage):
3143 """
3144 Adds an error.
3145 returns False;
3146 """
3147 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3148 return False;
3149
3150 def errorComment(self, iLineInComment, sMessage):
3151 """
3152 Adds a comment error.
3153 returns False;
3154 """
3155 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3156 return False;
3157
3158 def printErrors(self):
3159 """
3160 Print the errors to stderr.
3161 Returns number of errors.
3162 """
3163 if self.asErrors:
3164 sys.stderr.write(u''.join(self.asErrors));
3165 return len(self.asErrors);
3166
3167 def debug(self, sMessage):
3168 """
3169 For debugging.
3170 """
3171 if self.fDebug:
3172 print('debug: %s' % (sMessage,), file = sys.stderr);
3173
3174 def stripComments(self, sLine):
3175 """
3176 Returns sLine with comments stripped.
3177
3178 Complains if traces of incomplete multi-line comments are encountered.
3179 """
3180 sLine = self.oReComment.sub(" ", sLine);
3181 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3182 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3183 return sLine;
3184
3185 def parseFunctionTable(self, sLine):
3186 """
3187 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3188
3189 Note! Updates iLine as it consumes the whole table.
3190 """
3191
3192 #
3193 # Extract the table name.
3194 #
3195 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3196 oMap = g_dInstructionMapsByIemName.get(sName);
3197 if not oMap:
3198 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3199 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3200
3201 #
3202 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3203 # entries per byte:
3204 # no prefix, 066h prefix, f3h prefix, f2h prefix
3205 # Those tables has 256 & 32 entries respectively.
3206 #
3207 cEntriesPerByte = 4;
3208 cValidTableLength = 1024;
3209 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3210
3211 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3212 if oEntriesMatch:
3213 cEntriesPerByte = 1;
3214 cValidTableLength = int(oEntriesMatch.group(1));
3215 asPrefixes = (None,);
3216
3217 #
3218 # The next line should be '{' and nothing else.
3219 #
3220 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3221 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3222 self.iLine += 1;
3223
3224 #
3225 # Parse till we find the end of the table.
3226 #
3227 iEntry = 0;
3228 while self.iLine < len(self.asLines):
3229 # Get the next line and strip comments and spaces (assumes no
3230 # multi-line comments).
3231 sLine = self.asLines[self.iLine];
3232 self.iLine += 1;
3233 sLine = self.stripComments(sLine).strip();
3234
3235 # Split the line up into entries, expanding IEMOP_X4 usage.
3236 asEntries = sLine.split(',');
3237 for i in range(len(asEntries) - 1, -1, -1):
3238 sEntry = asEntries[i].strip();
3239 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3240 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3241 asEntries.insert(i + 1, sEntry);
3242 asEntries.insert(i + 1, sEntry);
3243 asEntries.insert(i + 1, sEntry);
3244 if sEntry:
3245 asEntries[i] = sEntry;
3246 else:
3247 del asEntries[i];
3248
3249 # Process the entries.
3250 for sEntry in asEntries:
3251 if sEntry in ('};', '}'):
3252 if iEntry != cValidTableLength:
3253 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3254 return True;
3255 if sEntry.startswith('iemOp_Invalid'):
3256 pass; # skip
3257 else:
3258 # Look up matching instruction by function.
3259 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3260 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3261 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3262 if aoInstr:
3263 if not isinstance(aoInstr, list):
3264 aoInstr = [aoInstr,];
3265 oInstr = None;
3266 for oCurInstr in aoInstr:
3267 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3268 pass;
3269 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3270 oCurInstr.sPrefix = sPrefix;
3271 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3272 oCurInstr.sOpcode = sOpcode;
3273 oCurInstr.sPrefix = sPrefix;
3274 else:
3275 continue;
3276 oInstr = oCurInstr;
3277 break;
3278 if not oInstr:
3279 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3280 aoInstr.append(oInstr);
3281 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3282 g_aoAllInstructions.append(oInstr);
3283 oMap.aoInstructions.append(oInstr);
3284 else:
3285 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3286 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3287 iEntry += 1;
3288
3289 return self.error('Unexpected end of file in PFNIEMOP table');
3290
3291 def addInstruction(self, iLine = None):
3292 """
3293 Adds an instruction.
3294 """
3295 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3296 g_aoAllInstructions.append(oInstr);
3297 self.aoCurInstrs.append(oInstr);
3298 return oInstr;
3299
3300 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3301 """
3302 Derives the mnemonic and operands from a IEM stats base name like string.
3303 """
3304 if oInstr.sMnemonic is None:
3305 asWords = sStats.split('_');
3306 oInstr.sMnemonic = asWords[0].lower();
3307 if len(asWords) > 1 and not oInstr.aoOperands:
3308 for sType in asWords[1:]:
3309 if sType in g_kdOpTypes:
3310 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3311 else:
3312 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3313 return False;
3314 return True;
3315
3316 def doneInstructionOne(self, oInstr, iLine):
3317 """
3318 Complete the parsing by processing, validating and expanding raw inputs.
3319 """
3320 assert oInstr.iLineCompleted is None;
3321 oInstr.iLineCompleted = iLine;
3322
3323 #
3324 # Specified instructions.
3325 #
3326 if oInstr.cOpTags > 0:
3327 if oInstr.sStats is None:
3328 pass;
3329
3330 #
3331 # Unspecified legacy stuff. We generally only got a few things to go on here.
3332 # /** Opcode 0x0f 0x00 /0. */
3333 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3334 #
3335 else:
3336 #if oInstr.sRawOldOpcodes:
3337 #
3338 #if oInstr.sMnemonic:
3339 pass;
3340
3341 #
3342 # Common defaults.
3343 #
3344
3345 # Guess mnemonic and operands from stats if the former is missing.
3346 if oInstr.sMnemonic is None:
3347 if oInstr.sStats is not None:
3348 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3349 elif oInstr.sFunction is not None:
3350 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3351
3352 # Derive the disassembler op enum constant from the mnemonic.
3353 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3354 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3355
3356 # Derive the IEM statistics base name from mnemonic and operand types.
3357 if oInstr.sStats is None:
3358 if oInstr.sFunction is not None:
3359 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3360 elif oInstr.sMnemonic is not None:
3361 oInstr.sStats = oInstr.sMnemonic;
3362 for oOperand in oInstr.aoOperands:
3363 if oOperand.sType:
3364 oInstr.sStats += '_' + oOperand.sType;
3365
3366 # Derive the IEM function name from mnemonic and operand types.
3367 if oInstr.sFunction is None:
3368 if oInstr.sMnemonic is not None:
3369 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3370 for oOperand in oInstr.aoOperands:
3371 if oOperand.sType:
3372 oInstr.sFunction += '_' + oOperand.sType;
3373 elif oInstr.sStats:
3374 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3375
3376 #
3377 # Apply default map and then add the instruction to all it's groups.
3378 #
3379 if not oInstr.aoMaps:
3380 oInstr.aoMaps = [ self.oDefaultMap, ];
3381 for oMap in oInstr.aoMaps:
3382 oMap.aoInstructions.append(oInstr);
3383
3384 #
3385 # Derive encoding from operands and maps.
3386 #
3387 if oInstr.sEncoding is None:
3388 if not oInstr.aoOperands:
3389 if oInstr.fUnused and oInstr.sSubOpcode:
3390 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3391 else:
3392 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3393 elif oInstr.aoOperands[0].usesModRM():
3394 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3395 or oInstr.onlyInVexMaps():
3396 oInstr.sEncoding = 'VEX.ModR/M';
3397 else:
3398 oInstr.sEncoding = 'ModR/M';
3399
3400 #
3401 # Check the opstat value and add it to the opstat indexed dictionary.
3402 #
3403 if oInstr.sStats:
3404 if oInstr.sStats not in g_dAllInstructionsByStat:
3405 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3406 else:
3407 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3408 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3409
3410 #
3411 # Add to function indexed dictionary. We allow multiple instructions per function.
3412 #
3413 if oInstr.sFunction:
3414 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3415 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3416 else:
3417 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3418
3419 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3420 return True;
3421
3422 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3423 """
3424 Done with current instruction.
3425 """
3426 for oInstr in self.aoCurInstrs:
3427 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3428 if oInstr.fStub:
3429 self.cTotalStubs += 1;
3430
3431 self.cTotalInstr += len(self.aoCurInstrs);
3432
3433 self.sComment = '';
3434 self.aoCurInstrs = [];
3435 if fEndOfFunction:
3436 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3437 if self.oCurFunction:
3438 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3439 self.oCurFunction = None;
3440 self.iMcBlockInFunc = 0;
3441 return True;
3442
3443 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3444 """
3445 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3446 is False, only None values and empty strings are replaced.
3447 """
3448 for oInstr in self.aoCurInstrs:
3449 if fOverwrite is not True:
3450 oOldValue = getattr(oInstr, sAttrib);
3451 if oOldValue is not None:
3452 continue;
3453 setattr(oInstr, sAttrib, oValue);
3454
3455 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3456 """
3457 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3458 If fOverwrite is False, only None values and empty strings are replaced.
3459 """
3460 for oInstr in self.aoCurInstrs:
3461 aoArray = getattr(oInstr, sAttrib);
3462 while len(aoArray) <= iEntry:
3463 aoArray.append(None);
3464 if fOverwrite is True or aoArray[iEntry] is None:
3465 aoArray[iEntry] = oValue;
3466
3467 def parseCommentOldOpcode(self, asLines):
3468 """ Deals with 'Opcode 0xff /4' like comments """
3469 asWords = asLines[0].split();
3470 if len(asWords) >= 2 \
3471 and asWords[0] == 'Opcode' \
3472 and ( asWords[1].startswith('0x')
3473 or asWords[1].startswith('0X')):
3474 asWords = asWords[:1];
3475 for iWord, sWord in enumerate(asWords):
3476 if sWord.startswith('0X'):
3477 sWord = '0x' + sWord[:2];
3478 asWords[iWord] = asWords;
3479 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3480
3481 return False;
3482
3483 def ensureInstructionForOpTag(self, iTagLine):
3484 """ Ensure there is an instruction for the op-tag being parsed. """
3485 if not self.aoCurInstrs:
3486 self.addInstruction(self.iCommentLine + iTagLine);
3487 for oInstr in self.aoCurInstrs:
3488 oInstr.cOpTags += 1;
3489 if oInstr.cOpTags == 1:
3490 self.cTotalTagged += 1;
3491 return self.aoCurInstrs[-1];
3492
3493 @staticmethod
3494 def flattenSections(aasSections):
3495 """
3496 Flattens multiline sections into stripped single strings.
3497 Returns list of strings, on section per string.
3498 """
3499 asRet = [];
3500 for asLines in aasSections:
3501 if asLines:
3502 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3503 return asRet;
3504
3505 @staticmethod
3506 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3507 """
3508 Flattens sections into a simple stripped string with newlines as
3509 section breaks. The final section does not sport a trailing newline.
3510 """
3511 # Typical: One section with a single line.
3512 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3513 return aasSections[0][0].strip();
3514
3515 sRet = '';
3516 for iSection, asLines in enumerate(aasSections):
3517 if asLines:
3518 if iSection > 0:
3519 sRet += sSectionSep;
3520 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3521 return sRet;
3522
3523
3524
3525 ## @name Tag parsers
3526 ## @{
3527
3528 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3529 """
3530 Tag: \@opbrief
3531 Value: Text description, multiple sections, appended.
3532
3533 Brief description. If not given, it's the first sentence from @opdesc.
3534 """
3535 oInstr = self.ensureInstructionForOpTag(iTagLine);
3536
3537 # Flatten and validate the value.
3538 sBrief = self.flattenAllSections(aasSections);
3539 if not sBrief:
3540 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3541 if sBrief[-1] != '.':
3542 sBrief = sBrief + '.';
3543 if len(sBrief) > 180:
3544 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3545 offDot = sBrief.find('.');
3546 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3547 offDot = sBrief.find('.', offDot + 1);
3548 if offDot >= 0 and offDot != len(sBrief) - 1:
3549 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3550
3551 # Update the instruction.
3552 if oInstr.sBrief is not None:
3553 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3554 % (sTag, oInstr.sBrief, sBrief,));
3555 _ = iEndLine;
3556 return True;
3557
3558 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3559 """
3560 Tag: \@opdesc
3561 Value: Text description, multiple sections, appended.
3562
3563 It is used to describe instructions.
3564 """
3565 oInstr = self.ensureInstructionForOpTag(iTagLine);
3566 if aasSections:
3567 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3568 return True;
3569
3570 _ = sTag; _ = iEndLine;
3571 return True;
3572
3573 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3574 """
3575 Tag: @opmenmonic
3576 Value: mnemonic
3577
3578 The 'mnemonic' value must be a valid C identifier string. Because of
3579 prefixes, groups and whatnot, there times when the mnemonic isn't that
3580 of an actual assembler mnemonic.
3581 """
3582 oInstr = self.ensureInstructionForOpTag(iTagLine);
3583
3584 # Flatten and validate the value.
3585 sMnemonic = self.flattenAllSections(aasSections);
3586 if not self.oReMnemonic.match(sMnemonic):
3587 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3588 if oInstr.sMnemonic is not None:
3589 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3590 % (sTag, oInstr.sMnemonic, sMnemonic,));
3591 oInstr.sMnemonic = sMnemonic
3592
3593 _ = iEndLine;
3594 return True;
3595
3596 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3597 """
3598 Tags: \@op1, \@op2, \@op3, \@op4
3599 Value: [where:]type
3600
3601 The 'where' value indicates where the operand is found, like the 'reg'
3602 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3603 a list.
3604
3605 The 'type' value indicates the operand type. These follow the types
3606 given in the opcode tables in the CPU reference manuals.
3607 See Instruction.kdOperandTypes for a list.
3608
3609 """
3610 oInstr = self.ensureInstructionForOpTag(iTagLine);
3611 idxOp = int(sTag[-1]) - 1;
3612 assert 0 <= idxOp < 4;
3613
3614 # flatten, split up, and validate the "where:type" value.
3615 sFlattened = self.flattenAllSections(aasSections);
3616 asSplit = sFlattened.split(':');
3617 if len(asSplit) == 1:
3618 sType = asSplit[0];
3619 sWhere = None;
3620 elif len(asSplit) == 2:
3621 (sWhere, sType) = asSplit;
3622 else:
3623 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3624
3625 if sType not in g_kdOpTypes:
3626 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3627 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3628 if sWhere is None:
3629 sWhere = g_kdOpTypes[sType][1];
3630 elif sWhere not in g_kdOpLocations:
3631 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3632 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3633
3634 # Insert the operand, refusing to overwrite an existing one.
3635 while idxOp >= len(oInstr.aoOperands):
3636 oInstr.aoOperands.append(None);
3637 if oInstr.aoOperands[idxOp] is not None:
3638 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3639 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3640 sWhere, sType,));
3641 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3642
3643 _ = iEndLine;
3644 return True;
3645
3646 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3647 """
3648 Tag: \@opmaps
3649 Value: map[,map2]
3650
3651 Indicates which maps the instruction is in. There is a default map
3652 associated with each input file.
3653 """
3654 oInstr = self.ensureInstructionForOpTag(iTagLine);
3655
3656 # Flatten, split up and validate the value.
3657 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3658 asMaps = sFlattened.split(',');
3659 if not asMaps:
3660 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3661 for sMap in asMaps:
3662 if sMap not in g_dInstructionMaps:
3663 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3664 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3665
3666 # Add the maps to the current list. Throw errors on duplicates.
3667 for oMap in oInstr.aoMaps:
3668 if oMap.sName in asMaps:
3669 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3670
3671 for sMap in asMaps:
3672 oMap = g_dInstructionMaps[sMap];
3673 if oMap not in oInstr.aoMaps:
3674 oInstr.aoMaps.append(oMap);
3675 else:
3676 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3677
3678 _ = iEndLine;
3679 return True;
3680
3681 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3682 """
3683 Tag: \@oppfx
3684 Value: n/a|none|0x66|0xf3|0xf2
3685
3686 Required prefix for the instruction. (In a (E)VEX context this is the
3687 value of the 'pp' field rather than an actual prefix.)
3688 """
3689 oInstr = self.ensureInstructionForOpTag(iTagLine);
3690
3691 # Flatten and validate the value.
3692 sFlattened = self.flattenAllSections(aasSections);
3693 asPrefixes = sFlattened.split();
3694 if len(asPrefixes) > 1:
3695 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3696
3697 sPrefix = asPrefixes[0].lower();
3698 if sPrefix == 'none':
3699 sPrefix = 'none';
3700 elif sPrefix == 'n/a':
3701 sPrefix = None;
3702 else:
3703 if len(sPrefix) == 2:
3704 sPrefix = '0x' + sPrefix;
3705 if not _isValidOpcodeByte(sPrefix):
3706 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3707
3708 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3709 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3710
3711 # Set it.
3712 if oInstr.sPrefix is not None:
3713 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3714 oInstr.sPrefix = sPrefix;
3715
3716 _ = iEndLine;
3717 return True;
3718
3719 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3720 """
3721 Tag: \@opcode
3722 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3723
3724 The opcode byte or sub-byte for the instruction in the context of a map.
3725 """
3726 oInstr = self.ensureInstructionForOpTag(iTagLine);
3727
3728 # Flatten and validate the value.
3729 sOpcode = self.flattenAllSections(aasSections);
3730 if _isValidOpcodeByte(sOpcode):
3731 pass;
3732 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3733 pass;
3734 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3735 pass;
3736 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3737 pass;
3738 else:
3739 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3740
3741 # Set it.
3742 if oInstr.sOpcode is not None:
3743 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3744 oInstr.sOpcode = sOpcode;
3745
3746 _ = iEndLine;
3747 return True;
3748
3749 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3750 """
3751 Tag: \@opcodesub
3752 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3753 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3754
3755 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3756 represents exactly two different instructions. The more proper way would
3757 be to go via maps with two members, but this is faster.
3758 """
3759 oInstr = self.ensureInstructionForOpTag(iTagLine);
3760
3761 # Flatten and validate the value.
3762 sSubOpcode = self.flattenAllSections(aasSections);
3763 if sSubOpcode not in g_kdSubOpcodes:
3764 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3765 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3766
3767 # Set it.
3768 if oInstr.sSubOpcode is not None:
3769 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3770 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3771 oInstr.sSubOpcode = sSubOpcode;
3772
3773 _ = iEndLine;
3774 return True;
3775
3776 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3777 """
3778 Tag: \@openc
3779 Value: ModR/M|fixed|prefix|<map name>
3780
3781 The instruction operand encoding style.
3782 """
3783 oInstr = self.ensureInstructionForOpTag(iTagLine);
3784
3785 # Flatten and validate the value.
3786 sEncoding = self.flattenAllSections(aasSections);
3787 if sEncoding in g_kdEncodings:
3788 pass;
3789 elif sEncoding in g_dInstructionMaps:
3790 pass;
3791 elif not _isValidOpcodeByte(sEncoding):
3792 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3793
3794 # Set it.
3795 if oInstr.sEncoding is not None:
3796 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3797 % ( sTag, oInstr.sEncoding, sEncoding,));
3798 oInstr.sEncoding = sEncoding;
3799
3800 _ = iEndLine;
3801 return True;
3802
3803 ## EFlags tag to Instruction attribute name.
3804 kdOpFlagToAttr = {
3805 '@opfltest': 'asFlTest',
3806 '@opflmodify': 'asFlModify',
3807 '@opflundef': 'asFlUndefined',
3808 '@opflset': 'asFlSet',
3809 '@opflclear': 'asFlClear',
3810 };
3811
3812 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3813 """
3814 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3815 Value: <eflags specifier>
3816
3817 """
3818 oInstr = self.ensureInstructionForOpTag(iTagLine);
3819
3820 # Flatten, split up and validate the values.
3821 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3822 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3823 asFlags = [];
3824 else:
3825 fRc = True;
3826 for iFlag, sFlag in enumerate(asFlags):
3827 if sFlag not in g_kdEFlagsMnemonics:
3828 if sFlag.strip() in g_kdEFlagsMnemonics:
3829 asFlags[iFlag] = sFlag.strip();
3830 else:
3831 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3832 if not fRc:
3833 return False;
3834
3835 # Set them.
3836 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3837 if asOld is not None:
3838 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3839 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3840
3841 _ = iEndLine;
3842 return True;
3843
3844 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3845 """
3846 Tag: \@ophints
3847 Value: Comma or space separated list of flags and hints.
3848
3849 This covers the disassembler flags table and more.
3850 """
3851 oInstr = self.ensureInstructionForOpTag(iTagLine);
3852
3853 # Flatten as a space separated list, split it up and validate the values.
3854 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3855 if len(asHints) == 1 and asHints[0].lower() == 'none':
3856 asHints = [];
3857 else:
3858 fRc = True;
3859 for iHint, sHint in enumerate(asHints):
3860 if sHint not in g_kdHints:
3861 if sHint.strip() in g_kdHints:
3862 sHint[iHint] = sHint.strip();
3863 else:
3864 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3865 if not fRc:
3866 return False;
3867
3868 # Append them.
3869 for sHint in asHints:
3870 if sHint not in oInstr.dHints:
3871 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3872 else:
3873 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3874
3875 _ = iEndLine;
3876 return True;
3877
3878 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3879 """
3880 Tag: \@opdisenum
3881 Value: OP_XXXX
3882
3883 This is for select a specific (legacy) disassembler enum value for the
3884 instruction.
3885 """
3886 oInstr = self.ensureInstructionForOpTag(iTagLine);
3887
3888 # Flatten and split.
3889 asWords = self.flattenAllSections(aasSections).split();
3890 if len(asWords) != 1:
3891 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3892 if not asWords:
3893 return False;
3894 sDisEnum = asWords[0];
3895 if not self.oReDisEnum.match(sDisEnum):
3896 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3897 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3898
3899 # Set it.
3900 if oInstr.sDisEnum is not None:
3901 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3902 oInstr.sDisEnum = sDisEnum;
3903
3904 _ = iEndLine;
3905 return True;
3906
3907 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3908 """
3909 Tag: \@opmincpu
3910 Value: <simple CPU name>
3911
3912 Indicates when this instruction was introduced.
3913 """
3914 oInstr = self.ensureInstructionForOpTag(iTagLine);
3915
3916 # Flatten the value, split into words, make sure there's just one, valid it.
3917 asCpus = self.flattenAllSections(aasSections).split();
3918 if len(asCpus) > 1:
3919 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3920
3921 sMinCpu = asCpus[0];
3922 if sMinCpu in g_kdCpuNames:
3923 oInstr.sMinCpu = sMinCpu;
3924 else:
3925 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3926 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3927
3928 # Set it.
3929 if oInstr.sMinCpu is None:
3930 oInstr.sMinCpu = sMinCpu;
3931 elif oInstr.sMinCpu != sMinCpu:
3932 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3933
3934 _ = iEndLine;
3935 return True;
3936
3937 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3938 """
3939 Tag: \@opcpuid
3940 Value: none | <CPUID flag specifier>
3941
3942 CPUID feature bit which is required for the instruction to be present.
3943 """
3944 oInstr = self.ensureInstructionForOpTag(iTagLine);
3945
3946 # Flatten as a space separated list, split it up and validate the values.
3947 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3948 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3949 asCpuIds = [];
3950 else:
3951 fRc = True;
3952 for iCpuId, sCpuId in enumerate(asCpuIds):
3953 if sCpuId not in g_kdCpuIdFlags:
3954 if sCpuId.strip() in g_kdCpuIdFlags:
3955 sCpuId[iCpuId] = sCpuId.strip();
3956 else:
3957 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3958 if not fRc:
3959 return False;
3960
3961 # Append them.
3962 for sCpuId in asCpuIds:
3963 if sCpuId not in oInstr.asCpuIds:
3964 oInstr.asCpuIds.append(sCpuId);
3965 else:
3966 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3967
3968 _ = iEndLine;
3969 return True;
3970
3971 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3972 """
3973 Tag: \@opgroup
3974 Value: op_grp1[_subgrp2[_subsubgrp3]]
3975
3976 Instruction grouping.
3977 """
3978 oInstr = self.ensureInstructionForOpTag(iTagLine);
3979
3980 # Flatten as a space separated list, split it up and validate the values.
3981 asGroups = self.flattenAllSections(aasSections).split();
3982 if len(asGroups) != 1:
3983 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3984 sGroup = asGroups[0];
3985 if not self.oReGroupName.match(sGroup):
3986 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3987 % (sTag, sGroup, self.oReGroupName.pattern));
3988
3989 # Set it.
3990 if oInstr.sGroup is not None:
3991 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3992 oInstr.sGroup = sGroup;
3993
3994 _ = iEndLine;
3995 return True;
3996
3997 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3998 """
3999 Tag: \@opunused, \@opinvalid, \@opinvlstyle
4000 Value: <invalid opcode behaviour style>
4001
4002 The \@opunused indicates the specification is for a currently unused
4003 instruction encoding.
4004
4005 The \@opinvalid indicates the specification is for an invalid currently
4006 instruction encoding (like UD2).
4007
4008 The \@opinvlstyle just indicates how CPUs decode the instruction when
4009 not supported (\@opcpuid, \@opmincpu) or disabled.
4010 """
4011 oInstr = self.ensureInstructionForOpTag(iTagLine);
4012
4013 # Flatten as a space separated list, split it up and validate the values.
4014 asStyles = self.flattenAllSections(aasSections).split();
4015 if len(asStyles) != 1:
4016 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4017 sStyle = asStyles[0];
4018 if sStyle not in g_kdInvalidStyles:
4019 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4020 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4021 # Set it.
4022 if oInstr.sInvalidStyle is not None:
4023 return self.errorComment(iTagLine,
4024 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4025 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4026 oInstr.sInvalidStyle = sStyle;
4027 if sTag == '@opunused':
4028 oInstr.fUnused = True;
4029 elif sTag == '@opinvalid':
4030 oInstr.fInvalid = True;
4031
4032 _ = iEndLine;
4033 return True;
4034
4035 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4036 """
4037 Tag: \@optest
4038 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4039 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4040
4041 The main idea here is to generate basic instruction tests.
4042
4043 The probably simplest way of handling the diverse input, would be to use
4044 it to produce size optimized byte code for a simple interpreter that
4045 modifies the register input and output states.
4046
4047 An alternative to the interpreter would be creating multiple tables,
4048 but that becomes rather complicated wrt what goes where and then to use
4049 them in an efficient manner.
4050 """
4051 oInstr = self.ensureInstructionForOpTag(iTagLine);
4052
4053 #
4054 # Do it section by section.
4055 #
4056 for asSectionLines in aasSections:
4057 #
4058 # Sort the input into outputs, inputs and selector conditions.
4059 #
4060 sFlatSection = self.flattenAllSections([asSectionLines,]);
4061 if not sFlatSection:
4062 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4063 continue;
4064 oTest = InstructionTest(oInstr);
4065
4066 asSelectors = [];
4067 asInputs = [];
4068 asOutputs = [];
4069 asCur = asOutputs;
4070 fRc = True;
4071 asWords = sFlatSection.split();
4072 for iWord in range(len(asWords) - 1, -1, -1):
4073 sWord = asWords[iWord];
4074 # Check for array switchers.
4075 if sWord == '->':
4076 if asCur != asOutputs:
4077 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4078 break;
4079 asCur = asInputs;
4080 elif sWord == '/':
4081 if asCur != asInputs:
4082 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4083 break;
4084 asCur = asSelectors;
4085 else:
4086 asCur.insert(0, sWord);
4087
4088 #
4089 # Validate and add selectors.
4090 #
4091 for sCond in asSelectors:
4092 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4093 oSelector = None;
4094 for sOp in TestSelector.kasCompareOps:
4095 off = sCondExp.find(sOp);
4096 if off >= 0:
4097 sVariable = sCondExp[:off];
4098 sValue = sCondExp[off + len(sOp):];
4099 if sVariable in TestSelector.kdVariables:
4100 if sValue in TestSelector.kdVariables[sVariable]:
4101 oSelector = TestSelector(sVariable, sOp, sValue);
4102 else:
4103 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4104 % ( sTag, sValue, sCond,
4105 TestSelector.kdVariables[sVariable].keys(),));
4106 else:
4107 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4108 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4109 break;
4110 if oSelector is not None:
4111 for oExisting in oTest.aoSelectors:
4112 if oExisting.sVariable == oSelector.sVariable:
4113 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4114 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4115 oTest.aoSelectors.append(oSelector);
4116 else:
4117 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4118
4119 #
4120 # Validate outputs and inputs, adding them to the test as we go along.
4121 #
4122 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4123 asValidFieldKinds = [ 'both', sDesc, ];
4124 for sItem in asItems:
4125 oItem = None;
4126 for sOp in TestInOut.kasOperators:
4127 off = sItem.find(sOp);
4128 if off < 0:
4129 continue;
4130 sField = sItem[:off];
4131 sValueType = sItem[off + len(sOp):];
4132 if sField in TestInOut.kdFields \
4133 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4134 asSplit = sValueType.split(':', 1);
4135 sValue = asSplit[0];
4136 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4137 if sType in TestInOut.kdTypes:
4138 oValid = TestInOut.kdTypes[sType].validate(sValue);
4139 if oValid is True:
4140 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4141 oItem = TestInOut(sField, sOp, sValue, sType);
4142 else:
4143 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4144 % ( sTag, sDesc, sItem, ));
4145 else:
4146 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4147 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4148 else:
4149 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4150 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4151 else:
4152 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4153 % ( sTag, sDesc, sField, sItem,
4154 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4155 if asVal[1] in asValidFieldKinds]),));
4156 break;
4157 if oItem is not None:
4158 for oExisting in aoDst:
4159 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4160 self.errorComment(iTagLine,
4161 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4162 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4163 aoDst.append(oItem);
4164 else:
4165 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4166
4167 #
4168 # .
4169 #
4170 if fRc:
4171 oInstr.aoTests.append(oTest);
4172 else:
4173 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4174 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4175 % (sTag, asSelectors, asInputs, asOutputs,));
4176
4177 _ = iEndLine;
4178 return True;
4179
4180 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4181 """
4182 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4183 """
4184 oInstr = self.ensureInstructionForOpTag(iTagLine);
4185
4186 iTest = 0;
4187 if sTag[-1] == ']':
4188 iTest = int(sTag[8:-1]);
4189 else:
4190 iTest = int(sTag[7:]);
4191
4192 if iTest != len(oInstr.aoTests):
4193 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4194 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4195
4196 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4197 """
4198 Tag: \@optestign | \@optestignore
4199 Value: <value is ignored>
4200
4201 This is a simple trick to ignore a test while debugging another.
4202
4203 See also \@oponlytest.
4204 """
4205 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4206 return True;
4207
4208 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4209 """
4210 Tag: \@opcopytests
4211 Value: <opstat | function> [..]
4212 Example: \@opcopytests add_Eb_Gb
4213
4214 Trick to avoid duplicating tests for different encodings of the same
4215 operation.
4216 """
4217 oInstr = self.ensureInstructionForOpTag(iTagLine);
4218
4219 # Flatten, validate and append the copy job to the instruction. We execute
4220 # them after parsing all the input so we can handle forward references.
4221 asToCopy = self.flattenAllSections(aasSections).split();
4222 if not asToCopy:
4223 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4224 for sToCopy in asToCopy:
4225 if sToCopy not in oInstr.asCopyTests:
4226 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4227 oInstr.asCopyTests.append(sToCopy);
4228 else:
4229 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4230 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4231 else:
4232 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4233
4234 _ = iEndLine;
4235 return True;
4236
4237 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4238 """
4239 Tag: \@oponlytest | \@oponly
4240 Value: none
4241
4242 Only test instructions with this tag. This is a trick that is handy
4243 for singling out one or two new instructions or tests.
4244
4245 See also \@optestignore.
4246 """
4247 oInstr = self.ensureInstructionForOpTag(iTagLine);
4248
4249 # Validate and add instruction to only test dictionary.
4250 sValue = self.flattenAllSections(aasSections).strip();
4251 if sValue:
4252 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4253
4254 if oInstr not in g_aoOnlyTestInstructions:
4255 g_aoOnlyTestInstructions.append(oInstr);
4256
4257 _ = iEndLine;
4258 return True;
4259
4260 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4261 """
4262 Tag: \@opxcpttype
4263 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4264
4265 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4266 """
4267 oInstr = self.ensureInstructionForOpTag(iTagLine);
4268
4269 # Flatten as a space separated list, split it up and validate the values.
4270 asTypes = self.flattenAllSections(aasSections).split();
4271 if len(asTypes) != 1:
4272 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4273 sType = asTypes[0];
4274 if sType not in g_kdXcptTypes:
4275 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4276 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4277 # Set it.
4278 if oInstr.sXcptType is not None:
4279 return self.errorComment(iTagLine,
4280 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4281 % ( sTag, oInstr.sXcptType, sType,));
4282 oInstr.sXcptType = sType;
4283
4284 _ = iEndLine;
4285 return True;
4286
4287 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4288 """
4289 Tag: \@opfunction
4290 Value: <VMM function name>
4291
4292 This is for explicitly setting the IEM function name. Normally we pick
4293 this up from the FNIEMOP_XXX macro invocation after the description, or
4294 generate it from the mnemonic and operands.
4295
4296 It it thought it maybe necessary to set it when specifying instructions
4297 which implementation isn't following immediately or aren't implemented yet.
4298 """
4299 oInstr = self.ensureInstructionForOpTag(iTagLine);
4300
4301 # Flatten and validate the value.
4302 sFunction = self.flattenAllSections(aasSections);
4303 if not self.oReFunctionName.match(sFunction):
4304 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4305 % (sTag, sFunction, self.oReFunctionName.pattern));
4306
4307 if oInstr.sFunction is not None:
4308 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4309 % (sTag, oInstr.sFunction, sFunction,));
4310 oInstr.sFunction = sFunction;
4311
4312 _ = iEndLine;
4313 return True;
4314
4315 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4316 """
4317 Tag: \@opstats
4318 Value: <VMM statistics base name>
4319
4320 This is for explicitly setting the statistics name. Normally we pick
4321 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4322 the mnemonic and operands.
4323
4324 It it thought it maybe necessary to set it when specifying instructions
4325 which implementation isn't following immediately or aren't implemented yet.
4326 """
4327 oInstr = self.ensureInstructionForOpTag(iTagLine);
4328
4329 # Flatten and validate the value.
4330 sStats = self.flattenAllSections(aasSections);
4331 if not self.oReStatsName.match(sStats):
4332 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4333 % (sTag, sStats, self.oReStatsName.pattern));
4334
4335 if oInstr.sStats is not None:
4336 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4337 % (sTag, oInstr.sStats, sStats,));
4338 oInstr.sStats = sStats;
4339
4340 _ = iEndLine;
4341 return True;
4342
4343 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4344 """
4345 Tag: \@opdone
4346 Value: none
4347
4348 Used to explictily flush the instructions that have been specified.
4349 """
4350 sFlattened = self.flattenAllSections(aasSections);
4351 if sFlattened != '':
4352 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4353 _ = sTag; _ = iEndLine;
4354 return self.doneInstructions();
4355
4356 ## @}
4357
4358
4359 def parseComment(self):
4360 """
4361 Parse the current comment (self.sComment).
4362
4363 If it's a opcode specifiying comment, we reset the macro stuff.
4364 """
4365 #
4366 # Reject if comment doesn't seem to contain anything interesting.
4367 #
4368 if self.sComment.find('Opcode') < 0 \
4369 and self.sComment.find('@') < 0:
4370 return False;
4371
4372 #
4373 # Split the comment into lines, removing leading asterisks and spaces.
4374 # Also remove leading and trailing empty lines.
4375 #
4376 asLines = self.sComment.split('\n');
4377 for iLine, sLine in enumerate(asLines):
4378 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4379
4380 while asLines and not asLines[0]:
4381 self.iCommentLine += 1;
4382 asLines.pop(0);
4383
4384 while asLines and not asLines[-1]:
4385 asLines.pop(len(asLines) - 1);
4386
4387 #
4388 # Check for old style: Opcode 0x0f 0x12
4389 #
4390 if asLines[0].startswith('Opcode '):
4391 self.parseCommentOldOpcode(asLines);
4392
4393 #
4394 # Look for @op* tagged data.
4395 #
4396 cOpTags = 0;
4397 sFlatDefault = None;
4398 sCurTag = '@default';
4399 iCurTagLine = 0;
4400 asCurSection = [];
4401 aasSections = [ asCurSection, ];
4402 for iLine, sLine in enumerate(asLines):
4403 if not sLine.startswith('@'):
4404 if sLine:
4405 asCurSection.append(sLine);
4406 elif asCurSection:
4407 asCurSection = [];
4408 aasSections.append(asCurSection);
4409 else:
4410 #
4411 # Process the previous tag.
4412 #
4413 if not asCurSection and len(aasSections) > 1:
4414 aasSections.pop(-1);
4415 if sCurTag in self.dTagHandlers:
4416 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4417 cOpTags += 1;
4418 elif sCurTag.startswith('@op'):
4419 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4420 elif sCurTag == '@default':
4421 sFlatDefault = self.flattenAllSections(aasSections);
4422 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4423 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4424 elif sCurTag in ['@encoding', '@opencoding']:
4425 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4426
4427 #
4428 # New tag.
4429 #
4430 asSplit = sLine.split(None, 1);
4431 sCurTag = asSplit[0].lower();
4432 if len(asSplit) > 1:
4433 asCurSection = [asSplit[1],];
4434 else:
4435 asCurSection = [];
4436 aasSections = [asCurSection, ];
4437 iCurTagLine = iLine;
4438
4439 #
4440 # Process the final tag.
4441 #
4442 if not asCurSection and len(aasSections) > 1:
4443 aasSections.pop(-1);
4444 if sCurTag in self.dTagHandlers:
4445 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4446 cOpTags += 1;
4447 elif sCurTag.startswith('@op'):
4448 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4449 elif sCurTag == '@default':
4450 sFlatDefault = self.flattenAllSections(aasSections);
4451
4452 #
4453 # Don't allow default text in blocks containing @op*.
4454 #
4455 if cOpTags > 0 and sFlatDefault:
4456 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4457
4458 return True;
4459
4460 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4461 """
4462 Parses a macro invocation.
4463
4464 Returns three values:
4465 1. A list of macro arguments, where the zero'th is the macro name.
4466 2. The offset following the macro invocation, into sInvocation of
4467 this is on the same line or into the last line if it is on a
4468 different line.
4469 3. Number of additional lines the invocation spans (i.e. zero if
4470 it is all contained within sInvocation).
4471 """
4472 # First the name.
4473 offOpen = sInvocation.find('(', offStartInvocation);
4474 if offOpen <= offStartInvocation:
4475 self.raiseError("macro invocation open parenthesis not found");
4476 sName = sInvocation[offStartInvocation:offOpen].strip();
4477 if not self.oReMacroName.match(sName):
4478 self.raiseError("invalid macro name '%s'" % (sName,));
4479 asRet = [sName, ];
4480
4481 # Arguments.
4482 iLine = self.iLine;
4483 cDepth = 1;
4484 off = offOpen + 1;
4485 offStart = off;
4486 offCurLn = 0;
4487 chQuote = None;
4488 while cDepth > 0:
4489 if off >= len(sInvocation):
4490 if iLine >= len(self.asLines):
4491 self.error('macro invocation beyond end of file');
4492 return (asRet, off - offCurLn, iLine - self.iLine);
4493 offCurLn = off;
4494 sInvocation += self.asLines[iLine];
4495 iLine += 1;
4496 ch = sInvocation[off];
4497
4498 if chQuote:
4499 if ch == '\\' and off + 1 < len(sInvocation):
4500 off += 1;
4501 elif ch == chQuote:
4502 chQuote = None;
4503 elif ch in ('"', '\'',):
4504 chQuote = ch;
4505 elif ch in (',', ')',):
4506 if cDepth == 1:
4507 asRet.append(sInvocation[offStart:off].strip());
4508 offStart = off + 1;
4509 if ch == ')':
4510 cDepth -= 1;
4511 elif ch == '(':
4512 cDepth += 1;
4513 off += 1;
4514
4515 return (asRet, off - offCurLn, iLine - self.iLine);
4516
4517 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4518 """
4519 Returns (None, len(sCode), 0) if not found, otherwise the
4520 parseMacroInvocation() return value.
4521 """
4522 offHit = sCode.find(sMacro, offStart);
4523 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4524 return self.parseMacroInvocation(sCode, offHit);
4525 return (None, len(sCode), 0);
4526
4527 def findAndParseMacroInvocation(self, sCode, sMacro):
4528 """
4529 Returns None if not found, arguments as per parseMacroInvocation if found.
4530 """
4531 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4532
4533 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4534 """
4535 Returns same as findAndParseMacroInvocation.
4536 """
4537 for sMacro in asMacro:
4538 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4539 if asRet is not None:
4540 return asRet;
4541 return None;
4542
4543 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4544 sDisHints, sIemHints, asOperands):
4545 """
4546 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4547 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4548 """
4549 #
4550 # Some invocation checks.
4551 #
4552 if sUpper != sUpper.upper():
4553 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4554 if sLower != sLower.lower():
4555 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4556 if sUpper.lower() != sLower:
4557 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4558 if not self.oReMnemonic.match(sLower):
4559 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4560
4561 #
4562 # Check if sIemHints tells us to not consider this macro invocation.
4563 #
4564 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4565 return True;
4566
4567 # Apply to the last instruction only for now.
4568 if not self.aoCurInstrs:
4569 self.addInstruction();
4570 oInstr = self.aoCurInstrs[-1];
4571 if oInstr.iLineMnemonicMacro == -1:
4572 oInstr.iLineMnemonicMacro = self.iLine;
4573 else:
4574 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4575 % (sMacro, oInstr.iLineMnemonicMacro,));
4576
4577 # Mnemonic
4578 if oInstr.sMnemonic is None:
4579 oInstr.sMnemonic = sLower;
4580 elif oInstr.sMnemonic != sLower:
4581 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4582
4583 # Process operands.
4584 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4585 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4586 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4587 for iOperand, sType in enumerate(asOperands):
4588 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4589 if sWhere is None:
4590 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4591 if iOperand < len(oInstr.aoOperands): # error recovery.
4592 sWhere = oInstr.aoOperands[iOperand].sWhere;
4593 sType = oInstr.aoOperands[iOperand].sType;
4594 else:
4595 sWhere = 'reg';
4596 sType = 'Gb';
4597 if iOperand == len(oInstr.aoOperands):
4598 oInstr.aoOperands.append(Operand(sWhere, sType))
4599 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4600 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4601 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4602 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4603
4604 # Encoding.
4605 if sForm not in g_kdIemForms:
4606 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4607 else:
4608 if oInstr.sEncoding is None:
4609 oInstr.sEncoding = g_kdIemForms[sForm][0];
4610 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4611 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4612 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4613
4614 # Check the parameter locations for the encoding.
4615 if g_kdIemForms[sForm][1] is not None:
4616 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4617 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4618 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4619 else:
4620 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4621 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4622 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4623 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4624 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4625 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4626 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4627 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4628 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4629 or sForm.replace('VEX','').find('V') < 0) ):
4630 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4631 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4632 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4633 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4634 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4635 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4636 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4637 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4638 oInstr.aoOperands[iOperand].sWhere));
4639
4640
4641 # Check @opcodesub
4642 if oInstr.sSubOpcode \
4643 and g_kdIemForms[sForm][2] \
4644 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4645 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4646 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4647
4648 # Stats.
4649 if not self.oReStatsName.match(sStats):
4650 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4651 elif oInstr.sStats is None:
4652 oInstr.sStats = sStats;
4653 elif oInstr.sStats != sStats:
4654 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4655 % (sMacro, oInstr.sStats, sStats,));
4656
4657 # Process the hints (simply merge with @ophints w/o checking anything).
4658 for sHint in sDisHints.split('|'):
4659 sHint = sHint.strip();
4660 if sHint.startswith('DISOPTYPE_'):
4661 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4662 if sShortHint in g_kdHints:
4663 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4664 else:
4665 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4666 elif sHint != '0':
4667 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4668
4669 for sHint in sIemHints.split('|'):
4670 sHint = sHint.strip();
4671 if sHint.startswith('IEMOPHINT_'):
4672 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4673 if sShortHint in g_kdHints:
4674 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4675 else:
4676 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4677 elif sHint != '0':
4678 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4679
4680 _ = sAsm;
4681 return True;
4682
4683 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4684 """
4685 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4686 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4687 """
4688 if not asOperands:
4689 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4690 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4691 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4692
4693 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4694 """
4695 Process a IEM_MC_BEGIN macro invocation.
4696 """
4697 if self.fDebugMc:
4698 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4699 #self.debug('%s<eos>' % (sCode,));
4700
4701 # Check preconditions.
4702 if not self.oCurFunction:
4703 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4704 if self.oCurMcBlock:
4705 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4706
4707 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4708 cchIndent = offBeginStatementInCodeStr;
4709 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4710 if offPrevNewline >= 0:
4711 cchIndent -= offPrevNewline + 1;
4712 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4713
4714 # Start a new block.
4715 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4716 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4717 g_aoMcBlocks.append(self.oCurMcBlock);
4718 self.cTotalMcBlocks += 1;
4719 self.iMcBlockInFunc += 1;
4720 return True;
4721
4722 @staticmethod
4723 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
4724 """
4725 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
4726 extracting a statement block from a string that's the result of macro
4727 expansion and therefore contains multiple "sub-lines" as it were.
4728
4729 Returns list of lines covering offBegin thru offEnd in sRawLine.
4730 """
4731
4732 off = sRawLine.find('\n', offEnd);
4733 if off > 0:
4734 sRawLine = sRawLine[:off + 1];
4735
4736 off = sRawLine.rfind('\n', 0, offBegin) + 1;
4737 sRawLine = sRawLine[off:];
4738 if not sRawLine.strip().startswith(sBeginStmt):
4739 sRawLine = sRawLine[offBegin - off:]
4740
4741 return [sLine + '\n' for sLine in sRawLine.split('\n')];
4742
4743 def workerIemMcEnd(self, offEndStatementInLine):
4744 """
4745 Process a IEM_MC_END macro invocation.
4746 """
4747 if self.fDebugMc:
4748 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4749
4750 # Check preconditions.
4751 if not self.oCurMcBlock:
4752 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4753
4754 #
4755 # HACK ALERT! For blocks orginating from macro expansion the start and
4756 # end line will be the same, but the line has multiple
4757 # newlines inside it. So, we have to do some extra tricks
4758 # to get the lines out of there. We ASSUME macros aren't
4759 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4760 #
4761 if self.iLine > self.oCurMcBlock.iBeginLine:
4762 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4763 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4764 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4765 else:
4766 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
4767 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
4768
4769 #
4770 # Strip anything following the IEM_MC_END(); statement in the final line,
4771 # so that we don't carry on any trailing 'break' after macro expansions
4772 # like for iemOp_movsb_Xb_Yb.
4773 #
4774 while asLines[-1].strip() == '':
4775 asLines.pop();
4776 sFinal = asLines[-1];
4777 offFinalEnd = sFinal.find('IEM_MC_END');
4778 offEndInFinal = offFinalEnd;
4779 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4780 offFinalEnd += len('IEM_MC_END');
4781
4782 while sFinal[offFinalEnd].isspace():
4783 offFinalEnd += 1;
4784 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4785 offFinalEnd += 1;
4786
4787 while sFinal[offFinalEnd].isspace():
4788 offFinalEnd += 1;
4789 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4790 offFinalEnd += 1;
4791
4792 while sFinal[offFinalEnd].isspace():
4793 offFinalEnd += 1;
4794 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4795 offFinalEnd += 1;
4796
4797 asLines[-1] = sFinal[: offFinalEnd];
4798
4799 #
4800 # Complete and discard the current block.
4801 #
4802 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
4803 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
4804 self.oCurMcBlock = None;
4805 return True;
4806
4807 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
4808 """
4809 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
4810 """
4811 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
4812 if self.fDebugMc:
4813 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
4814 #self.debug('%s<eos>' % (sCode,));
4815
4816 # Check preconditions.
4817 if not self.oCurFunction:
4818 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
4819 if self.oCurMcBlock:
4820 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
4821
4822 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4823 cchIndent = offBeginStatementInCodeStr;
4824 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4825 if offPrevNewline >= 0:
4826 cchIndent -= offPrevNewline + 1;
4827 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4828
4829 # Start a new block.
4830 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4831 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4832
4833 # Parse the statment.
4834 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
4835 if asArgs is None:
4836 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
4837 if len(asArgs) != cParams + 3:
4838 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
4839 % (sStmt, len(asArgs), cParams + 3,));
4840
4841 oMcBlock.aoStmts = [McStmtCall(asArgs[0], asArgs[1:], 1),];
4842
4843 # These MCs are not typically part of macro expansions, but let's get
4844 # it out of the way immediately if it's the case.
4845 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
4846 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
4847 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
4848 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
4849 asLines[-1] = asLines[-1][:offAfter + 1];
4850 else:
4851 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
4852 offAfter, sStmt);
4853 assert asLines[-1].find(';') >= 0;
4854 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
4855
4856 assert asLines[0].find(sStmt) >= 0;
4857 #if not asLines[0].strip().startswith(sStmt):
4858 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
4859
4860 # Advance to the line with the closing ')'.
4861 self.iLine += cLines;
4862
4863 # Complete the block.
4864 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
4865
4866 g_aoMcBlocks.append(oMcBlock);
4867 self.cTotalMcBlocks += 1;
4868 self.iMcBlockInFunc += 1;
4869
4870 return True;
4871
4872 def workerStartFunction(self, asArgs):
4873 """
4874 Deals with the start of a decoder function.
4875
4876 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4877 macros, so we get a argument list for these where the 0th argument is the
4878 macro name.
4879 """
4880 # Complete any existing function.
4881 if self.oCurFunction:
4882 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4883
4884 # Create the new function.
4885 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4886 return True;
4887
4888 def checkCodeForMacro(self, sCode, offLine):
4889 """
4890 Checks code for relevant macro invocation.
4891 """
4892
4893 #
4894 # Scan macro invocations.
4895 #
4896 if sCode.find('(') > 0:
4897 # Look for instruction decoder function definitions. ASSUME single line.
4898 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4899 [ 'FNIEMOP_DEF',
4900 'FNIEMOPRM_DEF',
4901 'FNIEMOP_STUB',
4902 'FNIEMOP_STUB_1',
4903 'FNIEMOP_UD_STUB',
4904 'FNIEMOP_UD_STUB_1' ]);
4905 if asArgs is not None:
4906 self.workerStartFunction(asArgs);
4907 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4908
4909 if not self.aoCurInstrs:
4910 self.addInstruction();
4911 for oInstr in self.aoCurInstrs:
4912 if oInstr.iLineFnIemOpMacro == -1:
4913 oInstr.iLineFnIemOpMacro = self.iLine;
4914 else:
4915 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4916 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4917 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4918 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4919 if asArgs[0].find('STUB') > 0:
4920 self.doneInstructions(fEndOfFunction = True);
4921 return True;
4922
4923 # Check for worker function definitions, so we can get a context for MC blocks.
4924 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4925 [ 'FNIEMOP_DEF_1',
4926 'FNIEMOP_DEF_2', ]);
4927 if asArgs is not None:
4928 self.workerStartFunction(asArgs);
4929 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4930 return True;
4931
4932 # IEMOP_HLP_DONE_VEX_DECODING_*
4933 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4934 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4935 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4936 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4937 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4938 ]);
4939 if asArgs is not None:
4940 sMacro = asArgs[0];
4941 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4942 for oInstr in self.aoCurInstrs:
4943 if 'vex_l_zero' not in oInstr.dHints:
4944 if oInstr.iLineMnemonicMacro >= 0:
4945 self.errorOnLine(oInstr.iLineMnemonicMacro,
4946 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4947 oInstr.dHints['vex_l_zero'] = True;
4948
4949 #
4950 # IEMOP_MNEMONIC*
4951 #
4952 if sCode.find('IEMOP_MNEMONIC') >= 0:
4953 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4954 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4955 if asArgs is not None:
4956 if len(self.aoCurInstrs) == 1:
4957 oInstr = self.aoCurInstrs[0];
4958 if oInstr.sStats is None:
4959 oInstr.sStats = asArgs[1];
4960 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4961
4962 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4963 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4964 if asArgs is not None:
4965 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4966 asArgs[7], []);
4967 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4968 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4969 if asArgs is not None:
4970 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4971 asArgs[8], [asArgs[6],]);
4972 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4973 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4974 if asArgs is not None:
4975 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4976 asArgs[9], [asArgs[6], asArgs[7]]);
4977 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4978 # a_fIemHints)
4979 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4980 if asArgs is not None:
4981 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4982 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4983 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4984 # a_fIemHints)
4985 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4986 if asArgs is not None:
4987 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4988 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4989
4990 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4991 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4992 if asArgs is not None:
4993 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4994 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4995 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4996 if asArgs is not None:
4997 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4998 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4999 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5000 if asArgs is not None:
5001 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5002 [asArgs[4], asArgs[5],]);
5003 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5004 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5005 if asArgs is not None:
5006 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5007 [asArgs[4], asArgs[5], asArgs[6],]);
5008 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5009 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5010 if asArgs is not None:
5011 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5012 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5013
5014 #
5015 # IEM_MC_BEGIN + IEM_MC_END.
5016 # We must support multiple instances per code snippet.
5017 #
5018 offCode = sCode.find('IEM_MC_');
5019 if offCode >= 0:
5020 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5021 if oMatch.group(1) == 'END':
5022 self.workerIemMcEnd(offLine + oMatch.start());
5023 elif oMatch.group(1) == 'BEGIN':
5024 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5025 else:
5026 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5027 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5028 return True;
5029
5030 return False;
5031
5032 def workerPreProcessRecreateMacroRegex(self):
5033 """
5034 Recreates self.oReMacros when self.dMacros changes.
5035 """
5036 if self.dMacros:
5037 sRegex = '';
5038 for sName, oMacro in self.dMacros.items():
5039 if sRegex:
5040 sRegex += '|' + sName;
5041 else:
5042 sRegex = '\\b(' + sName;
5043 if oMacro.asArgs is not None:
5044 sRegex += '\s*\(';
5045 else:
5046 sRegex += '\\b';
5047 sRegex += ')';
5048 self.oReMacros = re.compile(sRegex);
5049 else:
5050 self.oReMacros = None;
5051 return True;
5052
5053 def workerPreProcessDefine(self, sRest):
5054 """
5055 Handles a macro #define, the sRest is what follows after the directive word.
5056 """
5057
5058 #
5059 # If using line continutation, just concat all the lines together,
5060 # preserving the newline character but not the escaping.
5061 #
5062 iLineStart = self.iLine;
5063 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5064 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5065 self.iLine += 1;
5066 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
5067
5068 #
5069 # Use regex to split out the name, argument list and body.
5070 # If this fails, we assume it's a simple macro.
5071 #
5072 oMatch = self.oReHashDefine2.match(sRest);
5073 if oMatch:
5074 sAllArgs = oMatch.group(2).strip();
5075 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5076 sBody = oMatch.group(3);
5077 else:
5078 oMatch = self.oReHashDefine3.match(sRest);
5079 if not oMatch:
5080 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
5081 return self.error('bogus macro definition: %s' % (sRest,));
5082 asArgs = None;
5083 sBody = oMatch.group(2);
5084 sName = oMatch.group(1);
5085 assert sName == sName.strip();
5086 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5087
5088 #
5089 # Is this of any interest to us? We do NOT support MC blocks wihtin
5090 # nested macro expansion, just to avoid lots of extra work.
5091 #
5092 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5093 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5094 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5095 # siblings in the recompiler. This is a lot simpler than nested macro
5096 # expansion and lots of heuristics for locating all the relevant macros.
5097 # Also, this way we don't produce lots of unnecessary threaded functions.
5098 #
5099 if sBody.find("IEM_MC_BEGIN") < 0:
5100 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
5101 return True;
5102
5103 #
5104 # Add the macro.
5105 #
5106 if self.fDebugPreProc:
5107 self.debug('#define %s on line %u' % (sName, self.iLine,));
5108 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5109 return self.workerPreProcessRecreateMacroRegex();
5110
5111 def workerPreProcessUndef(self, sRest):
5112 """
5113 Handles a macro #undef, the sRest is what follows after the directive word.
5114 """
5115 # Quick comment strip and isolate the name.
5116 offSlash = sRest.find('/');
5117 if offSlash > 0:
5118 sRest = sRest[:offSlash];
5119 sName = sRest.strip();
5120
5121 # Remove the macro if we're clocking it.
5122 if sName in self.dMacros:
5123 if self.fDebugPreProc:
5124 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5125 del self.dMacros[sName];
5126 return self.workerPreProcessRecreateMacroRegex();
5127
5128 return True;
5129
5130 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
5131 """
5132 Handles a preprocessor directive.
5133 """
5134 oMatch = self.oReHashDefine.match(sLine);
5135 if oMatch:
5136 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
5137
5138 oMatch = self.oReHashUndef.match(sLine);
5139 if oMatch:
5140 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
5141 return False;
5142
5143 def expandMacros(self, sLine, oMatch):
5144 """
5145 Expands macros we know about in the given line.
5146 Currently we ASSUME there is only one and that is what oMatch matched.
5147 """
5148 #
5149 # Get our bearings.
5150 #
5151 offMatch = oMatch.start();
5152 sName = oMatch.group(1);
5153 assert sName == sLine[oMatch.start() : oMatch.end()];
5154 fWithArgs = sName.endswith('(');
5155 if fWithArgs:
5156 sName = sName[:-1].strip();
5157 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5158
5159 #
5160 # Deal with simple macro invocations w/o parameters.
5161 #
5162 if not fWithArgs:
5163 if self.fDebugPreProc:
5164 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5165 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5166
5167 #
5168 # Complicated macro with parameters.
5169 # Start by extracting the parameters. ASSUMES they are all on the same line!
5170 #
5171 cLevel = 1;
5172 offCur = oMatch.end();
5173 offCurArg = offCur;
5174 asArgs = [];
5175 while True:
5176 if offCur >= len(sLine):
5177 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5178 ch = sLine[offCur];
5179 if ch == '(':
5180 cLevel += 1;
5181 elif ch == ')':
5182 cLevel -= 1;
5183 if cLevel == 0:
5184 asArgs.append(sLine[offCurArg:offCur].strip());
5185 break;
5186 elif ch == ',' and cLevel == 1:
5187 asArgs.append(sLine[offCurArg:offCur].strip());
5188 offCurArg = offCur + 1;
5189 offCur += 1;
5190 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5191 asArgs = [];
5192 if len(oMacro.asArgs) != len(asArgs):
5193 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5194
5195 #
5196 # Do the expanding.
5197 #
5198 if self.fDebugPreProc:
5199 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5200 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5201
5202 def parse(self):
5203 """
5204 Parses the given file.
5205
5206 Returns number or errors.
5207 Raises exception on fatal trouble.
5208 """
5209 #self.debug('Parsing %s' % (self.sSrcFile,));
5210
5211 #
5212 # Loop thru the lines.
5213 #
5214 # Please mind that self.iLine may be updated by checkCodeForMacro and
5215 # other worker methods.
5216 #
5217 while self.iLine < len(self.asLines):
5218 sLine = self.asLines[self.iLine];
5219 self.iLine += 1;
5220 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5221
5222 # Expand macros we know about if we're currently in code.
5223 if self.iState == self.kiCode and self.oReMacros:
5224 oMatch = self.oReMacros.search(sLine);
5225 if oMatch:
5226 sLine = self.expandMacros(sLine, oMatch);
5227 if self.fDebugPreProc:
5228 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5229 self.asLines[self.iLine - 1] = sLine;
5230
5231 # Look for comments.
5232 offSlash = sLine.find('/');
5233 if offSlash >= 0:
5234 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5235 offLine = 0;
5236 while offLine < len(sLine):
5237 if self.iState == self.kiCode:
5238 # Look for substantial multiline comment so we pass the following MC as a whole line:
5239 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5240 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5241 offHit = sLine.find('/*', offLine);
5242 while offHit >= 0:
5243 offEnd = sLine.find('*/', offHit + 2);
5244 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5245 break;
5246 offHit = sLine.find('/*', offEnd);
5247
5248 if offHit >= 0:
5249 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5250 self.sComment = '';
5251 self.iCommentLine = self.iLine;
5252 self.iState = self.kiCommentMulti;
5253 offLine = offHit + 2;
5254 else:
5255 self.checkCodeForMacro(sLine[offLine:], offLine);
5256 offLine = len(sLine);
5257
5258 elif self.iState == self.kiCommentMulti:
5259 offHit = sLine.find('*/', offLine);
5260 if offHit >= 0:
5261 self.sComment += sLine[offLine:offHit];
5262 self.iState = self.kiCode;
5263 offLine = offHit + 2;
5264 self.parseComment();
5265 else:
5266 self.sComment += sLine[offLine:];
5267 offLine = len(sLine);
5268 else:
5269 assert False;
5270 # C++ line comment.
5271 elif offSlash > 0:
5272 self.checkCodeForMacro(sLine[:offSlash], 0);
5273
5274 # No slash, but append the line if in multi-line comment.
5275 elif self.iState == self.kiCommentMulti:
5276 #self.debug('line %d: multi' % (self.iLine,));
5277 self.sComment += sLine;
5278
5279 # No slash, but check if this is a macro #define or #undef, since we
5280 # need to be able to selectively expand the ones containing MC blocks.
5281 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5282 if self.fDebugPreProc:
5283 self.debug('line %d: pre-proc' % (self.iLine,));
5284 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5285
5286 # No slash, but check code line for relevant macro.
5287 elif ( self.iState == self.kiCode
5288 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5289 #self.debug('line %d: macro' % (self.iLine,));
5290 self.checkCodeForMacro(sLine, 0);
5291
5292 # If the line is a '}' in the first position, complete the instructions.
5293 elif self.iState == self.kiCode and sLine[0] == '}':
5294 #self.debug('line %d: }' % (self.iLine,));
5295 self.doneInstructions(fEndOfFunction = True);
5296
5297 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5298 # so we can check/add @oppfx info from it.
5299 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5300 self.parseFunctionTable(sLine);
5301
5302 self.doneInstructions(fEndOfFunction = True);
5303 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5304 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5305 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5306 return self.printErrors();
5307
5308## The parsed content of IEMAllInstCommonBodyMacros.h.
5309g_oParsedCommonBodyMacros = None # type: SimpleParser
5310
5311def __parseFileByName(sSrcFile, sDefaultMap):
5312 """
5313 Parses one source file for instruction specfications.
5314 """
5315 #
5316 # Read sSrcFile into a line array.
5317 #
5318 try:
5319 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5320 except Exception as oXcpt:
5321 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5322 try:
5323 asLines = oFile.readlines();
5324 except Exception as oXcpt:
5325 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5326 finally:
5327 oFile.close();
5328
5329 #
5330 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5331 # can use the macros from it when processing the other files.
5332 #
5333 global g_oParsedCommonBodyMacros;
5334 if g_oParsedCommonBodyMacros is None:
5335 # Locate the file.
5336 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5337 if not os.path.isfile(sCommonBodyMacros):
5338 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5339
5340 # Read it.
5341 try:
5342 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5343 asIncFiles = oIncFile.readlines();
5344 except Exception as oXcpt:
5345 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5346
5347 # Parse it.
5348 try:
5349 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5350 if oParser.parse() != 0:
5351 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5352 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5353 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5354 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5355 oParser.cTotalMcBlocks,
5356 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5357 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5358 except ParserException as oXcpt:
5359 print(str(oXcpt), file = sys.stderr);
5360 raise;
5361 g_oParsedCommonBodyMacros = oParser;
5362
5363 #
5364 # Do the parsing.
5365 #
5366 try:
5367 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5368 return (oParser.parse(), oParser) ;
5369 except ParserException as oXcpt:
5370 print(str(oXcpt), file = sys.stderr);
5371 raise;
5372
5373
5374def __doTestCopying():
5375 """
5376 Executes the asCopyTests instructions.
5377 """
5378 asErrors = [];
5379 for oDstInstr in g_aoAllInstructions:
5380 if oDstInstr.asCopyTests:
5381 for sSrcInstr in oDstInstr.asCopyTests:
5382 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5383 if oSrcInstr:
5384 aoSrcInstrs = [oSrcInstr,];
5385 else:
5386 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5387 if aoSrcInstrs:
5388 for oSrcInstr in aoSrcInstrs:
5389 if oSrcInstr != oDstInstr:
5390 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5391 else:
5392 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5393 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5394 else:
5395 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5396 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5397
5398 if asErrors:
5399 sys.stderr.write(u''.join(asErrors));
5400 return len(asErrors);
5401
5402
5403def __applyOnlyTest():
5404 """
5405 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5406 all other instructions so that only these get tested.
5407 """
5408 if g_aoOnlyTestInstructions:
5409 for oInstr in g_aoAllInstructions:
5410 if oInstr.aoTests:
5411 if oInstr not in g_aoOnlyTestInstructions:
5412 oInstr.aoTests = [];
5413 return 0;
5414
5415## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5416g_aaoAllInstrFilesAndDefaultMapAndSet = (
5417 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5418 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5419 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5420 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5421 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5422 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5423 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5424 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5425 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5426);
5427
5428def __parseFilesWorker(asFilesAndDefaultMap):
5429 """
5430 Parses all the IEMAllInstruction*.cpp.h files.
5431
5432 Returns a list of the parsers on success.
5433 Raises exception on failure.
5434 """
5435 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5436 cErrors = 0;
5437 aoParsers = [];
5438 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5439 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5440 sFilename = os.path.join(sSrcDir, sFilename);
5441 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5442 cErrors += cThisErrors;
5443 aoParsers.append(oParser);
5444 cErrors += __doTestCopying();
5445 cErrors += __applyOnlyTest();
5446
5447 # Total stub stats:
5448 cTotalStubs = 0;
5449 for oInstr in g_aoAllInstructions:
5450 cTotalStubs += oInstr.fStub;
5451 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5452 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5453 file = sys.stderr);
5454
5455 if cErrors != 0:
5456 raise Exception('%d parse errors' % (cErrors,));
5457 return aoParsers;
5458
5459
5460def parseFiles(asFiles):
5461 """
5462 Parses a selection of IEMAllInstruction*.cpp.h files.
5463
5464 Returns a list of the parsers on success.
5465 Raises exception on failure.
5466 """
5467 # Look up default maps for the files and call __parseFilesWorker to do the job.
5468 asFilesAndDefaultMap = [];
5469 for sFilename in asFiles:
5470 sName = os.path.split(sFilename)[1].lower();
5471 sMap = None;
5472 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5473 if aoInfo[0].lower() == sName:
5474 sMap = aoInfo[1];
5475 break;
5476 if not sMap:
5477 raise Exception('Unable to classify file: %s' % (sFilename,));
5478 asFilesAndDefaultMap.append((sFilename, sMap));
5479
5480 return __parseFilesWorker(asFilesAndDefaultMap);
5481
5482
5483def parseAll():
5484 """
5485 Parses all the IEMAllInstruction*.cpp.h files.
5486
5487 Returns a list of the parsers on success.
5488 Raises exception on failure.
5489 """
5490 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet]);
5491
5492
5493#
5494# Generators (may perhaps move later).
5495#
5496def __formatDisassemblerTableEntry(oInstr):
5497 """
5498 """
5499 sMacro = 'OP';
5500 cMaxOperands = 3;
5501 if len(oInstr.aoOperands) > 3:
5502 sMacro = 'OPVEX'
5503 cMaxOperands = 4;
5504 assert len(oInstr.aoOperands) <= cMaxOperands;
5505
5506 #
5507 # Format string.
5508 #
5509 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5510 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5511 sTmp += ' ' if iOperand == 0 else ',';
5512 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5513 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5514 else:
5515 sTmp += g_kdOpTypes[oOperand.sType][2];
5516 sTmp += '",';
5517 asColumns = [ sTmp, ];
5518
5519 #
5520 # Decoders.
5521 #
5522 iStart = len(asColumns);
5523 if oInstr.sEncoding is None:
5524 pass;
5525 elif oInstr.sEncoding == 'ModR/M':
5526 # ASSUME the first operand is using the ModR/M encoding
5527 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5528 asColumns.append('IDX_ParseModRM,');
5529 elif oInstr.sEncoding in [ 'prefix', ]:
5530 for oOperand in oInstr.aoOperands:
5531 asColumns.append('0,');
5532 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5533 pass;
5534 elif oInstr.sEncoding == 'VEX.ModR/M':
5535 asColumns.append('IDX_ParseModRM,');
5536 elif oInstr.sEncoding == 'vex2':
5537 asColumns.append('IDX_ParseVex2b,')
5538 elif oInstr.sEncoding == 'vex3':
5539 asColumns.append('IDX_ParseVex3b,')
5540 elif oInstr.sEncoding in g_dInstructionMaps:
5541 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5542 else:
5543 ## @todo
5544 #IDX_ParseTwoByteEsc,
5545 #IDX_ParseGrp1,
5546 #IDX_ParseShiftGrp2,
5547 #IDX_ParseGrp3,
5548 #IDX_ParseGrp4,
5549 #IDX_ParseGrp5,
5550 #IDX_Parse3DNow,
5551 #IDX_ParseGrp6,
5552 #IDX_ParseGrp7,
5553 #IDX_ParseGrp8,
5554 #IDX_ParseGrp9,
5555 #IDX_ParseGrp10,
5556 #IDX_ParseGrp12,
5557 #IDX_ParseGrp13,
5558 #IDX_ParseGrp14,
5559 #IDX_ParseGrp15,
5560 #IDX_ParseGrp16,
5561 #IDX_ParseThreeByteEsc4,
5562 #IDX_ParseThreeByteEsc5,
5563 #IDX_ParseModFence,
5564 #IDX_ParseEscFP,
5565 #IDX_ParseNopPause,
5566 #IDX_ParseInvOpModRM,
5567 assert False, str(oInstr);
5568
5569 # Check for immediates and stuff in the remaining operands.
5570 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5571 sIdx = g_kdOpTypes[oOperand.sType][0];
5572 #if sIdx != 'IDX_UseModRM':
5573 asColumns.append(sIdx + ',');
5574 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5575
5576 #
5577 # Opcode and operands.
5578 #
5579 assert oInstr.sDisEnum, str(oInstr);
5580 asColumns.append(oInstr.sDisEnum + ',');
5581 iStart = len(asColumns)
5582 for oOperand in oInstr.aoOperands:
5583 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5584 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5585
5586 #
5587 # Flags.
5588 #
5589 sTmp = '';
5590 for sHint in sorted(oInstr.dHints.keys()):
5591 sDefine = g_kdHints[sHint];
5592 if sDefine.startswith('DISOPTYPE_'):
5593 if sTmp:
5594 sTmp += ' | ' + sDefine;
5595 else:
5596 sTmp += sDefine;
5597 if sTmp:
5598 sTmp += '),';
5599 else:
5600 sTmp += '0),';
5601 asColumns.append(sTmp);
5602
5603 #
5604 # Format the columns into a line.
5605 #
5606 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5607 sLine = '';
5608 for i, s in enumerate(asColumns):
5609 if len(sLine) < aoffColumns[i]:
5610 sLine += ' ' * (aoffColumns[i] - len(sLine));
5611 else:
5612 sLine += ' ';
5613 sLine += s;
5614
5615 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5616 # DISOPTYPE_HARMLESS),
5617 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5618 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5619 return sLine;
5620
5621def __checkIfShortTable(aoTableOrdered, oMap):
5622 """
5623 Returns (iInstr, cInstructions, fShortTable)
5624 """
5625
5626 # Determin how much we can trim off.
5627 cInstructions = len(aoTableOrdered);
5628 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5629 cInstructions -= 1;
5630
5631 iInstr = 0;
5632 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5633 iInstr += 1;
5634
5635 # If we can save more than 30%, we go for the short table version.
5636 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5637 return (iInstr, cInstructions, True);
5638 _ = oMap; # Use this for overriding.
5639
5640 # Output the full table.
5641 return (0, len(aoTableOrdered), False);
5642
5643def generateDisassemblerTables(oDstFile = sys.stdout):
5644 """
5645 Generates disassembler tables.
5646
5647 Returns exit code.
5648 """
5649
5650 #
5651 # Parse all.
5652 #
5653 try:
5654 parseAll();
5655 except Exception as oXcpt:
5656 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5657 traceback.print_exc(file = sys.stderr);
5658 return 1;
5659
5660
5661 #
5662 # The disassembler uses a slightly different table layout to save space,
5663 # since several of the prefix varia
5664 #
5665 aoDisasmMaps = [];
5666 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5667 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5668 if oMap.sSelector != 'byte+pfx':
5669 aoDisasmMaps.append(oMap);
5670 else:
5671 # Split the map by prefix.
5672 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5673 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5674 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5675 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5676
5677 #
5678 # Dump each map.
5679 #
5680 asHeaderLines = [];
5681 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5682 for oMap in aoDisasmMaps:
5683 sName = oMap.sName;
5684
5685 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5686
5687 #
5688 # Get the instructions for the map and see if we can do a short version or not.
5689 #
5690 aoTableOrder = oMap.getInstructionsInTableOrder();
5691 cEntriesPerByte = oMap.getEntriesPerByte();
5692 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5693
5694 #
5695 # Output the table start.
5696 # Note! Short tables are static and only accessible via the map range record.
5697 #
5698 asLines = [];
5699 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5700 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5701 if fShortTable:
5702 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5703 else:
5704 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5705 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5706 asLines.append('{');
5707
5708 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5709 asLines.append(' /* %#04x: */' % (iInstrStart,));
5710
5711 #
5712 # Output the instructions.
5713 #
5714 iInstr = iInstrStart;
5715 while iInstr < iInstrEnd:
5716 oInstr = aoTableOrder[iInstr];
5717 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5718 if iInstr != iInstrStart:
5719 asLines.append('');
5720 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5721
5722 if oInstr is None:
5723 # Invalid. Optimize blocks of invalid instructions.
5724 cInvalidInstrs = 1;
5725 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5726 cInvalidInstrs += 1;
5727 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5728 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5729 iInstr += 0x10 * cEntriesPerByte - 1;
5730 elif cEntriesPerByte > 1:
5731 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5732 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5733 iInstr += 3;
5734 else:
5735 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5736 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5737 else:
5738 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5739 elif isinstance(oInstr, list):
5740 if len(oInstr) != 0:
5741 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5742 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5743 else:
5744 asLines.append(__formatDisassemblerTableEntry(oInstr));
5745 else:
5746 asLines.append(__formatDisassemblerTableEntry(oInstr));
5747
5748 iInstr += 1;
5749
5750 if iInstrStart >= iInstrEnd:
5751 asLines.append(' /* dummy */ INVALID_OPCODE');
5752
5753 asLines.append('};');
5754 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5755
5756 #
5757 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5758 #
5759 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5760 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5761 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5762
5763 #
5764 # Write out the lines.
5765 #
5766 oDstFile.write('\n'.join(asLines));
5767 oDstFile.write('\n');
5768 oDstFile.write('\n');
5769 #break; #for now
5770 return 0;
5771
5772if __name__ == '__main__':
5773 sys.exit(generateDisassemblerTables());
5774
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette