VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 99051

Last change on this file since 99051 was 98969, checked in by vboxsync, 21 months ago

VMM/IEM: More work on processing MC blocks, mainly related to reworking common functions for binary operations into body macros. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 250.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 98969 2023-03-15 00:24:47Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 98969 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531g_kdHints = {
532 'invalid': 'DISOPTYPE_INVALID', ##<
533 'harmless': 'DISOPTYPE_HARMLESS', ##<
534 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
535 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
536 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
537 'portio': 'DISOPTYPE_PORTIO', ##<
538 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
539 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
540 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
541 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
542 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
543 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
544 'illegal': 'DISOPTYPE_ILLEGAL', ##<
545 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
546 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
547 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
548 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
549 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
550 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
551 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
552 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
553 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
554 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
555 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
556 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
557 ## (only in 16 & 32 bits mode!)
558 'avx': 'DISOPTYPE_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
559 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
560 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
561 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
562 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
563 'ignores_rexw': '', ##< Ignores REX.W.
564 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
565 'vex_l_zero': '', ##< VEX.L must be 0.
566 'vex_l_ignored': '', ##< VEX.L is ignored.
567 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
568 'lock_allowed': '', ##< Lock prefix allowed.
569};
570
571## \@opxcpttype values (see SDMv2 2.4, 2.7).
572g_kdXcptTypes = {
573 'none': [],
574 '1': [],
575 '2': [],
576 '3': [],
577 '4': [],
578 '4UA': [],
579 '5': [],
580 '5LZ': [], # LZ = VEX.L must be zero.
581 '6': [],
582 '7': [],
583 '7LZ': [],
584 '8': [],
585 '11': [],
586 '12': [],
587 'E1': [],
588 'E1NF': [],
589 'E2': [],
590 'E3': [],
591 'E3NF': [],
592 'E4': [],
593 'E4NF': [],
594 'E5': [],
595 'E5NF': [],
596 'E6': [],
597 'E6NF': [],
598 'E7NF': [],
599 'E9': [],
600 'E9NF': [],
601 'E10': [],
602 'E11': [],
603 'E12': [],
604 'E12NF': [],
605};
606
607
608def _isValidOpcodeByte(sOpcode):
609 """
610 Checks if sOpcode is a valid lower case opcode byte.
611 Returns true/false.
612 """
613 if len(sOpcode) == 4:
614 if sOpcode[:2] == '0x':
615 if sOpcode[2] in '0123456789abcdef':
616 if sOpcode[3] in '0123456789abcdef':
617 return True;
618 return False;
619
620
621class InstructionMap(object):
622 """
623 Instruction map.
624
625 The opcode map provides the lead opcode bytes (empty for the one byte
626 opcode map). An instruction can be member of multiple opcode maps as long
627 as it uses the same opcode value within the map (because of VEX).
628 """
629
630 kdEncodings = {
631 'legacy': [],
632 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
633 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
634 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
635 'xop8': [], ##< XOP prefix with vvvvv = 8
636 'xop9': [], ##< XOP prefix with vvvvv = 9
637 'xop10': [], ##< XOP prefix with vvvvv = 10
638 };
639 ## Selectors.
640 ## 1. The first value is the number of table entries required by a
641 ## decoder or disassembler for this type of selector.
642 ## 2. The second value is how many entries per opcode byte if applicable.
643 kdSelectors = {
644 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
645 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
646 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
647 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
648 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
649 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
650 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
651 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
652 };
653
654 ## Define the subentry number according to the Instruction::sPrefix
655 ## value for 'byte+pfx' selected tables.
656 kiPrefixOrder = {
657 'none': 0,
658 '0x66': 1,
659 '0xf3': 2,
660 '0xf2': 3,
661 };
662
663 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
664 sEncoding = 'legacy', sDisParse = None):
665 assert sSelector in self.kdSelectors;
666 assert sEncoding in self.kdEncodings;
667 if asLeadOpcodes is None:
668 asLeadOpcodes = [];
669 else:
670 for sOpcode in asLeadOpcodes:
671 assert _isValidOpcodeByte(sOpcode);
672 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
673
674 self.sName = sName;
675 self.sIemName = sIemName;
676 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
677 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
678 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
679 self.aoInstructions = [] # type: Instruction
680 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
681
682 def copy(self, sNewName, sPrefixFilter = None):
683 """
684 Copies the table with filtering instruction by sPrefix if not None.
685 """
686 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
687 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
688 else self.sSelector,
689 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
690 if sPrefixFilter is None:
691 oCopy.aoInstructions = list(self.aoInstructions);
692 else:
693 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
694 return oCopy;
695
696 def getTableSize(self):
697 """
698 Number of table entries. This corresponds directly to the selector.
699 """
700 return self.kdSelectors[self.sSelector][0];
701
702 def getEntriesPerByte(self):
703 """
704 Number of table entries per opcode bytes.
705
706 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
707 the others it will just return 1.
708 """
709 return self.kdSelectors[self.sSelector][1];
710
711 def getInstructionIndex(self, oInstr):
712 """
713 Returns the table index for the instruction.
714 """
715 bOpcode = oInstr.getOpcodeByte();
716
717 # The byte selectors are simple. We need a full opcode byte and need just return it.
718 if self.sSelector == 'byte':
719 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
720 return bOpcode;
721
722 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
723 if self.sSelector == 'byte+pfx':
724 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
725 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
726 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
727
728 # The other selectors needs masking and shifting.
729 if self.sSelector == '/r':
730 return (bOpcode >> 3) & 0x7;
731
732 if self.sSelector == 'mod /r':
733 return (bOpcode >> 3) & 0x1f;
734
735 if self.sSelector == 'memreg /r':
736 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
737
738 if self.sSelector == '!11 /r':
739 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
740 return (bOpcode >> 3) & 0x7;
741
742 if self.sSelector == '11 /r':
743 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
744 return (bOpcode >> 3) & 0x7;
745
746 if self.sSelector == '11':
747 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
748 return bOpcode & 0x3f;
749
750 assert False, self.sSelector;
751 return -1;
752
753 def getInstructionsInTableOrder(self):
754 """
755 Get instructions in table order.
756
757 Returns array of instructions. Normally there is exactly one
758 instruction per entry. However the entry could also be None if
759 not instruction was specified for that opcode value. Or there
760 could be a list of instructions to deal with special encodings
761 where for instance prefix (e.g. REX.W) encodes a different
762 instruction or different CPUs have different instructions or
763 prefixes in the same place.
764 """
765 # Start with empty table.
766 cTable = self.getTableSize();
767 aoTable = [None] * cTable;
768
769 # Insert the instructions.
770 for oInstr in self.aoInstructions:
771 if oInstr.sOpcode:
772 idxOpcode = self.getInstructionIndex(oInstr);
773 assert idxOpcode < cTable, str(idxOpcode);
774
775 oExisting = aoTable[idxOpcode];
776 if oExisting is None:
777 aoTable[idxOpcode] = oInstr;
778 elif not isinstance(oExisting, list):
779 aoTable[idxOpcode] = list([oExisting, oInstr]);
780 else:
781 oExisting.append(oInstr);
782
783 return aoTable;
784
785
786 def getDisasTableName(self):
787 """
788 Returns the disassembler table name for this map.
789 """
790 sName = 'g_aDisas';
791 for sWord in self.sName.split('_'):
792 if sWord == 'm': # suffix indicating modrm.mod==mem
793 sName += '_m';
794 elif sWord == 'r': # suffix indicating modrm.mod==reg
795 sName += '_r';
796 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
797 sName += '_' + sWord;
798 else:
799 sWord = sWord.replace('grp', 'Grp');
800 sWord = sWord.replace('map', 'Map');
801 sName += sWord[0].upper() + sWord[1:];
802 return sName;
803
804 def getDisasRangeName(self):
805 """
806 Returns the disassembler table range name for this map.
807 """
808 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
809
810 def isVexMap(self):
811 """ Returns True if a VEX map. """
812 return self.sEncoding.startswith('vex');
813
814
815class TestType(object):
816 """
817 Test value type.
818
819 This base class deals with integer like values. The fUnsigned constructor
820 parameter indicates the default stance on zero vs sign extending. It is
821 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
822 """
823 def __init__(self, sName, acbSizes = None, fUnsigned = True):
824 self.sName = sName;
825 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
826 self.fUnsigned = fUnsigned;
827
828 class BadValue(Exception):
829 """ Bad value exception. """
830 def __init__(self, sMessage):
831 Exception.__init__(self, sMessage);
832 self.sMessage = sMessage;
833
834 ## For ascii ~ operator.
835 kdHexInv = {
836 '0': 'f',
837 '1': 'e',
838 '2': 'd',
839 '3': 'c',
840 '4': 'b',
841 '5': 'a',
842 '6': '9',
843 '7': '8',
844 '8': '7',
845 '9': '6',
846 'a': '5',
847 'b': '4',
848 'c': '3',
849 'd': '2',
850 'e': '1',
851 'f': '0',
852 };
853
854 def get(self, sValue):
855 """
856 Get the shortest normal sized byte representation of oValue.
857
858 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
859 The latter form is for AND+OR pairs where the first entry is what to
860 AND with the field and the second the one or OR with.
861
862 Raises BadValue if invalid value.
863 """
864 if not sValue:
865 raise TestType.BadValue('empty value');
866
867 # Deal with sign and detect hexadecimal or decimal.
868 fSignExtend = not self.fUnsigned;
869 if sValue[0] == '-' or sValue[0] == '+':
870 fSignExtend = True;
871 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
872 else:
873 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
874
875 # try convert it to long integer.
876 try:
877 iValue = long(sValue, 16 if fHex else 10);
878 except Exception as oXcpt:
879 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
880
881 # Convert the hex string and pad it to a decent value. Negative values
882 # needs to be manually converted to something non-negative (~-n + 1).
883 if iValue >= 0:
884 sHex = hex(iValue);
885 if sys.version_info[0] < 3:
886 assert sHex[-1] == 'L';
887 sHex = sHex[:-1];
888 assert sHex[:2] == '0x';
889 sHex = sHex[2:];
890 else:
891 sHex = hex(-iValue - 1);
892 if sys.version_info[0] < 3:
893 assert sHex[-1] == 'L';
894 sHex = sHex[:-1];
895 assert sHex[:2] == '0x';
896 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
897 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
898 sHex = 'f' + sHex;
899
900 cDigits = len(sHex);
901 if cDigits <= self.acbSizes[-1] * 2:
902 for cb in self.acbSizes:
903 cNaturalDigits = cb * 2;
904 if cDigits <= cNaturalDigits:
905 break;
906 else:
907 cNaturalDigits = self.acbSizes[-1] * 2;
908 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
909 assert isinstance(cNaturalDigits, int)
910
911 if cNaturalDigits != cDigits:
912 cNeeded = cNaturalDigits - cDigits;
913 if iValue >= 0:
914 sHex = ('0' * cNeeded) + sHex;
915 else:
916 sHex = ('f' * cNeeded) + sHex;
917
918 # Invert and convert to bytearray and return it.
919 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
920
921 return ((fSignExtend, abValue),);
922
923 def validate(self, sValue):
924 """
925 Returns True if value is okay, error message on failure.
926 """
927 try:
928 self.get(sValue);
929 except TestType.BadValue as oXcpt:
930 return oXcpt.sMessage;
931 return True;
932
933 def isAndOrPair(self, sValue):
934 """
935 Checks if sValue is a pair.
936 """
937 _ = sValue;
938 return False;
939
940
941class TestTypeEflags(TestType):
942 """
943 Special value parsing for EFLAGS/RFLAGS/FLAGS.
944 """
945
946 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
947
948 def __init__(self, sName):
949 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
950
951 def get(self, sValue):
952 fClear = 0;
953 fSet = 0;
954 for sFlag in sValue.split(','):
955 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
956 if sConstant is None:
957 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
958 if sConstant[0] == '!':
959 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
960 else:
961 fSet |= g_kdX86EFlagsConstants[sConstant];
962
963 aoSet = TestType.get(self, '0x%x' % (fSet,));
964 if fClear != 0:
965 aoClear = TestType.get(self, '%#x' % (fClear,))
966 assert self.isAndOrPair(sValue) is True;
967 return (aoClear[0], aoSet[0]);
968 assert self.isAndOrPair(sValue) is False;
969 return aoSet;
970
971 def isAndOrPair(self, sValue):
972 for sZeroFlag in self.kdZeroValueFlags:
973 if sValue.find(sZeroFlag) >= 0:
974 return True;
975 return False;
976
977class TestTypeFromDict(TestType):
978 """
979 Special value parsing for CR0.
980 """
981
982 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
983
984 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
985 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
986 self.kdConstantsAndValues = kdConstantsAndValues;
987 self.sConstantPrefix = sConstantPrefix;
988
989 def get(self, sValue):
990 fValue = 0;
991 for sFlag in sValue.split(','):
992 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
993 if fFlagValue is None:
994 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
995 fValue |= fFlagValue;
996 return TestType.get(self, '0x%x' % (fValue,));
997
998
999class TestInOut(object):
1000 """
1001 One input or output state modifier.
1002
1003 This should be thought as values to modify BS3REGCTX and extended (needs
1004 to be structured) state.
1005 """
1006 ## Assigned operators.
1007 kasOperators = [
1008 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1009 '&~=',
1010 '&=',
1011 '|=',
1012 '='
1013 ];
1014 ## Types
1015 kdTypes = {
1016 'uint': TestType('uint', fUnsigned = True),
1017 'int': TestType('int'),
1018 'efl': TestTypeEflags('efl'),
1019 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1020 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1021 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1022 };
1023 ## CPU context fields.
1024 kdFields = {
1025 # name: ( default type, [both|input|output], )
1026 # Operands.
1027 'op1': ( 'uint', 'both', ), ## \@op1
1028 'op2': ( 'uint', 'both', ), ## \@op2
1029 'op3': ( 'uint', 'both', ), ## \@op3
1030 'op4': ( 'uint', 'both', ), ## \@op4
1031 # Flags.
1032 'efl': ( 'efl', 'both', ),
1033 'efl_undef': ( 'uint', 'output', ),
1034 # 8-bit GPRs.
1035 'al': ( 'uint', 'both', ),
1036 'cl': ( 'uint', 'both', ),
1037 'dl': ( 'uint', 'both', ),
1038 'bl': ( 'uint', 'both', ),
1039 'ah': ( 'uint', 'both', ),
1040 'ch': ( 'uint', 'both', ),
1041 'dh': ( 'uint', 'both', ),
1042 'bh': ( 'uint', 'both', ),
1043 'r8l': ( 'uint', 'both', ),
1044 'r9l': ( 'uint', 'both', ),
1045 'r10l': ( 'uint', 'both', ),
1046 'r11l': ( 'uint', 'both', ),
1047 'r12l': ( 'uint', 'both', ),
1048 'r13l': ( 'uint', 'both', ),
1049 'r14l': ( 'uint', 'both', ),
1050 'r15l': ( 'uint', 'both', ),
1051 # 16-bit GPRs.
1052 'ax': ( 'uint', 'both', ),
1053 'dx': ( 'uint', 'both', ),
1054 'cx': ( 'uint', 'both', ),
1055 'bx': ( 'uint', 'both', ),
1056 'sp': ( 'uint', 'both', ),
1057 'bp': ( 'uint', 'both', ),
1058 'si': ( 'uint', 'both', ),
1059 'di': ( 'uint', 'both', ),
1060 'r8w': ( 'uint', 'both', ),
1061 'r9w': ( 'uint', 'both', ),
1062 'r10w': ( 'uint', 'both', ),
1063 'r11w': ( 'uint', 'both', ),
1064 'r12w': ( 'uint', 'both', ),
1065 'r13w': ( 'uint', 'both', ),
1066 'r14w': ( 'uint', 'both', ),
1067 'r15w': ( 'uint', 'both', ),
1068 # 32-bit GPRs.
1069 'eax': ( 'uint', 'both', ),
1070 'edx': ( 'uint', 'both', ),
1071 'ecx': ( 'uint', 'both', ),
1072 'ebx': ( 'uint', 'both', ),
1073 'esp': ( 'uint', 'both', ),
1074 'ebp': ( 'uint', 'both', ),
1075 'esi': ( 'uint', 'both', ),
1076 'edi': ( 'uint', 'both', ),
1077 'r8d': ( 'uint', 'both', ),
1078 'r9d': ( 'uint', 'both', ),
1079 'r10d': ( 'uint', 'both', ),
1080 'r11d': ( 'uint', 'both', ),
1081 'r12d': ( 'uint', 'both', ),
1082 'r13d': ( 'uint', 'both', ),
1083 'r14d': ( 'uint', 'both', ),
1084 'r15d': ( 'uint', 'both', ),
1085 # 64-bit GPRs.
1086 'rax': ( 'uint', 'both', ),
1087 'rdx': ( 'uint', 'both', ),
1088 'rcx': ( 'uint', 'both', ),
1089 'rbx': ( 'uint', 'both', ),
1090 'rsp': ( 'uint', 'both', ),
1091 'rbp': ( 'uint', 'both', ),
1092 'rsi': ( 'uint', 'both', ),
1093 'rdi': ( 'uint', 'both', ),
1094 'r8': ( 'uint', 'both', ),
1095 'r9': ( 'uint', 'both', ),
1096 'r10': ( 'uint', 'both', ),
1097 'r11': ( 'uint', 'both', ),
1098 'r12': ( 'uint', 'both', ),
1099 'r13': ( 'uint', 'both', ),
1100 'r14': ( 'uint', 'both', ),
1101 'r15': ( 'uint', 'both', ),
1102 # 16-bit, 32-bit or 64-bit registers according to operand size.
1103 'oz.rax': ( 'uint', 'both', ),
1104 'oz.rdx': ( 'uint', 'both', ),
1105 'oz.rcx': ( 'uint', 'both', ),
1106 'oz.rbx': ( 'uint', 'both', ),
1107 'oz.rsp': ( 'uint', 'both', ),
1108 'oz.rbp': ( 'uint', 'both', ),
1109 'oz.rsi': ( 'uint', 'both', ),
1110 'oz.rdi': ( 'uint', 'both', ),
1111 'oz.r8': ( 'uint', 'both', ),
1112 'oz.r9': ( 'uint', 'both', ),
1113 'oz.r10': ( 'uint', 'both', ),
1114 'oz.r11': ( 'uint', 'both', ),
1115 'oz.r12': ( 'uint', 'both', ),
1116 'oz.r13': ( 'uint', 'both', ),
1117 'oz.r14': ( 'uint', 'both', ),
1118 'oz.r15': ( 'uint', 'both', ),
1119 # Control registers.
1120 'cr0': ( 'cr0', 'both', ),
1121 'cr4': ( 'cr4', 'both', ),
1122 'xcr0': ( 'xcr0', 'both', ),
1123 # FPU Registers
1124 'fcw': ( 'uint', 'both', ),
1125 'fsw': ( 'uint', 'both', ),
1126 'ftw': ( 'uint', 'both', ),
1127 'fop': ( 'uint', 'both', ),
1128 'fpuip': ( 'uint', 'both', ),
1129 'fpucs': ( 'uint', 'both', ),
1130 'fpudp': ( 'uint', 'both', ),
1131 'fpuds': ( 'uint', 'both', ),
1132 'mxcsr': ( 'uint', 'both', ),
1133 'st0': ( 'uint', 'both', ),
1134 'st1': ( 'uint', 'both', ),
1135 'st2': ( 'uint', 'both', ),
1136 'st3': ( 'uint', 'both', ),
1137 'st4': ( 'uint', 'both', ),
1138 'st5': ( 'uint', 'both', ),
1139 'st6': ( 'uint', 'both', ),
1140 'st7': ( 'uint', 'both', ),
1141 # MMX registers.
1142 'mm0': ( 'uint', 'both', ),
1143 'mm1': ( 'uint', 'both', ),
1144 'mm2': ( 'uint', 'both', ),
1145 'mm3': ( 'uint', 'both', ),
1146 'mm4': ( 'uint', 'both', ),
1147 'mm5': ( 'uint', 'both', ),
1148 'mm6': ( 'uint', 'both', ),
1149 'mm7': ( 'uint', 'both', ),
1150 # SSE registers.
1151 'xmm0': ( 'uint', 'both', ),
1152 'xmm1': ( 'uint', 'both', ),
1153 'xmm2': ( 'uint', 'both', ),
1154 'xmm3': ( 'uint', 'both', ),
1155 'xmm4': ( 'uint', 'both', ),
1156 'xmm5': ( 'uint', 'both', ),
1157 'xmm6': ( 'uint', 'both', ),
1158 'xmm7': ( 'uint', 'both', ),
1159 'xmm8': ( 'uint', 'both', ),
1160 'xmm9': ( 'uint', 'both', ),
1161 'xmm10': ( 'uint', 'both', ),
1162 'xmm11': ( 'uint', 'both', ),
1163 'xmm12': ( 'uint', 'both', ),
1164 'xmm13': ( 'uint', 'both', ),
1165 'xmm14': ( 'uint', 'both', ),
1166 'xmm15': ( 'uint', 'both', ),
1167 'xmm0.lo': ( 'uint', 'both', ),
1168 'xmm1.lo': ( 'uint', 'both', ),
1169 'xmm2.lo': ( 'uint', 'both', ),
1170 'xmm3.lo': ( 'uint', 'both', ),
1171 'xmm4.lo': ( 'uint', 'both', ),
1172 'xmm5.lo': ( 'uint', 'both', ),
1173 'xmm6.lo': ( 'uint', 'both', ),
1174 'xmm7.lo': ( 'uint', 'both', ),
1175 'xmm8.lo': ( 'uint', 'both', ),
1176 'xmm9.lo': ( 'uint', 'both', ),
1177 'xmm10.lo': ( 'uint', 'both', ),
1178 'xmm11.lo': ( 'uint', 'both', ),
1179 'xmm12.lo': ( 'uint', 'both', ),
1180 'xmm13.lo': ( 'uint', 'both', ),
1181 'xmm14.lo': ( 'uint', 'both', ),
1182 'xmm15.lo': ( 'uint', 'both', ),
1183 'xmm0.hi': ( 'uint', 'both', ),
1184 'xmm1.hi': ( 'uint', 'both', ),
1185 'xmm2.hi': ( 'uint', 'both', ),
1186 'xmm3.hi': ( 'uint', 'both', ),
1187 'xmm4.hi': ( 'uint', 'both', ),
1188 'xmm5.hi': ( 'uint', 'both', ),
1189 'xmm6.hi': ( 'uint', 'both', ),
1190 'xmm7.hi': ( 'uint', 'both', ),
1191 'xmm8.hi': ( 'uint', 'both', ),
1192 'xmm9.hi': ( 'uint', 'both', ),
1193 'xmm10.hi': ( 'uint', 'both', ),
1194 'xmm11.hi': ( 'uint', 'both', ),
1195 'xmm12.hi': ( 'uint', 'both', ),
1196 'xmm13.hi': ( 'uint', 'both', ),
1197 'xmm14.hi': ( 'uint', 'both', ),
1198 'xmm15.hi': ( 'uint', 'both', ),
1199 'xmm0.lo.zx': ( 'uint', 'both', ),
1200 'xmm1.lo.zx': ( 'uint', 'both', ),
1201 'xmm2.lo.zx': ( 'uint', 'both', ),
1202 'xmm3.lo.zx': ( 'uint', 'both', ),
1203 'xmm4.lo.zx': ( 'uint', 'both', ),
1204 'xmm5.lo.zx': ( 'uint', 'both', ),
1205 'xmm6.lo.zx': ( 'uint', 'both', ),
1206 'xmm7.lo.zx': ( 'uint', 'both', ),
1207 'xmm8.lo.zx': ( 'uint', 'both', ),
1208 'xmm9.lo.zx': ( 'uint', 'both', ),
1209 'xmm10.lo.zx': ( 'uint', 'both', ),
1210 'xmm11.lo.zx': ( 'uint', 'both', ),
1211 'xmm12.lo.zx': ( 'uint', 'both', ),
1212 'xmm13.lo.zx': ( 'uint', 'both', ),
1213 'xmm14.lo.zx': ( 'uint', 'both', ),
1214 'xmm15.lo.zx': ( 'uint', 'both', ),
1215 'xmm0.dw0': ( 'uint', 'both', ),
1216 'xmm1.dw0': ( 'uint', 'both', ),
1217 'xmm2.dw0': ( 'uint', 'both', ),
1218 'xmm3.dw0': ( 'uint', 'both', ),
1219 'xmm4.dw0': ( 'uint', 'both', ),
1220 'xmm5.dw0': ( 'uint', 'both', ),
1221 'xmm6.dw0': ( 'uint', 'both', ),
1222 'xmm7.dw0': ( 'uint', 'both', ),
1223 'xmm8.dw0': ( 'uint', 'both', ),
1224 'xmm9.dw0': ( 'uint', 'both', ),
1225 'xmm10.dw0': ( 'uint', 'both', ),
1226 'xmm11.dw0': ( 'uint', 'both', ),
1227 'xmm12.dw0': ( 'uint', 'both', ),
1228 'xmm13.dw0': ( 'uint', 'both', ),
1229 'xmm14.dw0': ( 'uint', 'both', ),
1230 'xmm15_dw0': ( 'uint', 'both', ),
1231 # AVX registers.
1232 'ymm0': ( 'uint', 'both', ),
1233 'ymm1': ( 'uint', 'both', ),
1234 'ymm2': ( 'uint', 'both', ),
1235 'ymm3': ( 'uint', 'both', ),
1236 'ymm4': ( 'uint', 'both', ),
1237 'ymm5': ( 'uint', 'both', ),
1238 'ymm6': ( 'uint', 'both', ),
1239 'ymm7': ( 'uint', 'both', ),
1240 'ymm8': ( 'uint', 'both', ),
1241 'ymm9': ( 'uint', 'both', ),
1242 'ymm10': ( 'uint', 'both', ),
1243 'ymm11': ( 'uint', 'both', ),
1244 'ymm12': ( 'uint', 'both', ),
1245 'ymm13': ( 'uint', 'both', ),
1246 'ymm14': ( 'uint', 'both', ),
1247 'ymm15': ( 'uint', 'both', ),
1248
1249 # Special ones.
1250 'value.xcpt': ( 'uint', 'output', ),
1251 };
1252
1253 def __init__(self, sField, sOp, sValue, sType):
1254 assert sField in self.kdFields;
1255 assert sOp in self.kasOperators;
1256 self.sField = sField;
1257 self.sOp = sOp;
1258 self.sValue = sValue;
1259 self.sType = sType;
1260 assert isinstance(sField, str);
1261 assert isinstance(sOp, str);
1262 assert isinstance(sType, str);
1263 assert isinstance(sValue, str);
1264
1265
1266class TestSelector(object):
1267 """
1268 One selector for an instruction test.
1269 """
1270 ## Selector compare operators.
1271 kasCompareOps = [ '==', '!=' ];
1272 ## Selector variables and their valid values.
1273 kdVariables = {
1274 # Operand size.
1275 'size': {
1276 'o16': 'size_o16',
1277 'o32': 'size_o32',
1278 'o64': 'size_o64',
1279 },
1280 # VEX.L value.
1281 'vex.l': {
1282 '0': 'vexl_0',
1283 '1': 'vexl_1',
1284 },
1285 # Execution ring.
1286 'ring': {
1287 '0': 'ring_0',
1288 '1': 'ring_1',
1289 '2': 'ring_2',
1290 '3': 'ring_3',
1291 '0..2': 'ring_0_thru_2',
1292 '1..3': 'ring_1_thru_3',
1293 },
1294 # Basic code mode.
1295 'codebits': {
1296 '64': 'code_64bit',
1297 '32': 'code_32bit',
1298 '16': 'code_16bit',
1299 },
1300 # cpu modes.
1301 'mode': {
1302 'real': 'mode_real',
1303 'prot': 'mode_prot',
1304 'long': 'mode_long',
1305 'v86': 'mode_v86',
1306 'smm': 'mode_smm',
1307 'vmx': 'mode_vmx',
1308 'svm': 'mode_svm',
1309 },
1310 # paging on/off
1311 'paging': {
1312 'on': 'paging_on',
1313 'off': 'paging_off',
1314 },
1315 # CPU vendor
1316 'vendor': {
1317 'amd': 'vendor_amd',
1318 'intel': 'vendor_intel',
1319 'via': 'vendor_via',
1320 },
1321 };
1322 ## Selector shorthand predicates.
1323 ## These translates into variable expressions.
1324 kdPredicates = {
1325 'o16': 'size==o16',
1326 'o32': 'size==o32',
1327 'o64': 'size==o64',
1328 'ring0': 'ring==0',
1329 '!ring0': 'ring==1..3',
1330 'ring1': 'ring==1',
1331 'ring2': 'ring==2',
1332 'ring3': 'ring==3',
1333 'user': 'ring==3',
1334 'supervisor': 'ring==0..2',
1335 '16-bit': 'codebits==16',
1336 '32-bit': 'codebits==32',
1337 '64-bit': 'codebits==64',
1338 'real': 'mode==real',
1339 'prot': 'mode==prot',
1340 'long': 'mode==long',
1341 'v86': 'mode==v86',
1342 'smm': 'mode==smm',
1343 'vmx': 'mode==vmx',
1344 'svm': 'mode==svm',
1345 'paging': 'paging==on',
1346 '!paging': 'paging==off',
1347 'amd': 'vendor==amd',
1348 '!amd': 'vendor!=amd',
1349 'intel': 'vendor==intel',
1350 '!intel': 'vendor!=intel',
1351 'via': 'vendor==via',
1352 '!via': 'vendor!=via',
1353 };
1354
1355 def __init__(self, sVariable, sOp, sValue):
1356 assert sVariable in self.kdVariables;
1357 assert sOp in self.kasCompareOps;
1358 assert sValue in self.kdVariables[sVariable];
1359 self.sVariable = sVariable;
1360 self.sOp = sOp;
1361 self.sValue = sValue;
1362
1363
1364class InstructionTest(object):
1365 """
1366 Instruction test.
1367 """
1368
1369 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1370 self.oInstr = oInstr # type: InstructionTest
1371 self.aoInputs = [] # type: list(TestInOut)
1372 self.aoOutputs = [] # type: list(TestInOut)
1373 self.aoSelectors = [] # type: list(TestSelector)
1374
1375 def toString(self, fRepr = False):
1376 """
1377 Converts it to string representation.
1378 """
1379 asWords = [];
1380 if self.aoSelectors:
1381 for oSelector in self.aoSelectors:
1382 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1383 asWords.append('/');
1384
1385 for oModifier in self.aoInputs:
1386 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1387
1388 asWords.append('->');
1389
1390 for oModifier in self.aoOutputs:
1391 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1392
1393 if fRepr:
1394 return '<' + ' '.join(asWords) + '>';
1395 return ' '.join(asWords);
1396
1397 def __str__(self):
1398 """ Provide string represenation. """
1399 return self.toString(False);
1400
1401 def __repr__(self):
1402 """ Provide unambigious string representation. """
1403 return self.toString(True);
1404
1405class Operand(object):
1406 """
1407 Instruction operand.
1408 """
1409
1410 def __init__(self, sWhere, sType):
1411 assert sWhere in g_kdOpLocations, sWhere;
1412 assert sType in g_kdOpTypes, sType;
1413 self.sWhere = sWhere; ##< g_kdOpLocations
1414 self.sType = sType; ##< g_kdOpTypes
1415
1416 def usesModRM(self):
1417 """ Returns True if using some form of ModR/M encoding. """
1418 return self.sType[0] in ['E', 'G', 'M'];
1419
1420
1421
1422class Instruction(object): # pylint: disable=too-many-instance-attributes
1423 """
1424 Instruction.
1425 """
1426
1427 def __init__(self, sSrcFile, iLine):
1428 ## @name Core attributes.
1429 ## @{
1430 self.oParent = None # type: Instruction
1431 self.sMnemonic = None;
1432 self.sBrief = None;
1433 self.asDescSections = [] # type: list(str)
1434 self.aoMaps = [] # type: list(InstructionMap)
1435 self.aoOperands = [] # type: list(Operand)
1436 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1437 self.sOpcode = None # type: str
1438 self.sSubOpcode = None # type: str
1439 self.sEncoding = None;
1440 self.asFlTest = None;
1441 self.asFlModify = None;
1442 self.asFlUndefined = None;
1443 self.asFlSet = None;
1444 self.asFlClear = None;
1445 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1446 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1447 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1448 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1449 self.aoTests = [] # type: list(InstructionTest)
1450 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1451 self.oCpuExpr = None; ##< Some CPU restriction expression...
1452 self.sGroup = None;
1453 self.fUnused = False; ##< Unused instruction.
1454 self.fInvalid = False; ##< Invalid instruction (like UD2).
1455 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1456 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1457 ## @}
1458
1459 ## @name Implementation attributes.
1460 ## @{
1461 self.sStats = None;
1462 self.sFunction = None;
1463 self.fStub = False;
1464 self.fUdStub = False;
1465 ## @}
1466
1467 ## @name Decoding info
1468 ## @{
1469 self.sSrcFile = sSrcFile;
1470 self.iLineCreated = iLine;
1471 self.iLineCompleted = None;
1472 self.cOpTags = 0;
1473 self.iLineFnIemOpMacro = -1;
1474 self.iLineMnemonicMacro = -1;
1475 ## @}
1476
1477 ## @name Intermediate input fields.
1478 ## @{
1479 self.sRawDisOpNo = None;
1480 self.asRawDisParams = [];
1481 self.sRawIemOpFlags = None;
1482 self.sRawOldOpcodes = None;
1483 self.asCopyTests = [];
1484 ## @}
1485
1486 def toString(self, fRepr = False):
1487 """ Turn object into a string. """
1488 aasFields = [];
1489
1490 aasFields.append(['opcode', self.sOpcode]);
1491 if self.sPrefix:
1492 aasFields.append(['prefix', self.sPrefix]);
1493 aasFields.append(['mnemonic', self.sMnemonic]);
1494 for iOperand, oOperand in enumerate(self.aoOperands):
1495 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1496 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1497 aasFields.append(['encoding', self.sEncoding]);
1498 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1499 aasFields.append(['disenum', self.sDisEnum]);
1500 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1501 aasFields.append(['group', self.sGroup]);
1502 if self.fUnused: aasFields.append(['unused', 'True']);
1503 if self.fInvalid: aasFields.append(['invalid', 'True']);
1504 aasFields.append(['invlstyle', self.sInvalidStyle]);
1505 aasFields.append(['fltest', self.asFlTest]);
1506 aasFields.append(['flmodify', self.asFlModify]);
1507 aasFields.append(['flundef', self.asFlUndefined]);
1508 aasFields.append(['flset', self.asFlSet]);
1509 aasFields.append(['flclear', self.asFlClear]);
1510 aasFields.append(['mincpu', self.sMinCpu]);
1511 aasFields.append(['stats', self.sStats]);
1512 aasFields.append(['sFunction', self.sFunction]);
1513 if self.fStub: aasFields.append(['fStub', 'True']);
1514 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1515 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1516 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1517 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1518
1519 sRet = '<' if fRepr else '';
1520 for sField, sValue in aasFields:
1521 if sValue is not None:
1522 if len(sRet) > 1:
1523 sRet += '; ';
1524 sRet += '%s=%s' % (sField, sValue,);
1525 if fRepr:
1526 sRet += '>';
1527
1528 return sRet;
1529
1530 def __str__(self):
1531 """ Provide string represenation. """
1532 return self.toString(False);
1533
1534 def __repr__(self):
1535 """ Provide unambigious string representation. """
1536 return self.toString(True);
1537
1538 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1539 """
1540 Makes a copy of the object for the purpose of putting in a different map
1541 or a different place in the current map.
1542 """
1543 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1544
1545 oCopy.oParent = self;
1546 oCopy.sMnemonic = self.sMnemonic;
1547 oCopy.sBrief = self.sBrief;
1548 oCopy.asDescSections = list(self.asDescSections);
1549 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1550 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1551 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1552 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1553 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1554 oCopy.sEncoding = self.sEncoding;
1555 oCopy.asFlTest = self.asFlTest;
1556 oCopy.asFlModify = self.asFlModify;
1557 oCopy.asFlUndefined = self.asFlUndefined;
1558 oCopy.asFlSet = self.asFlSet;
1559 oCopy.asFlClear = self.asFlClear;
1560 oCopy.dHints = dict(self.dHints);
1561 oCopy.sDisEnum = self.sDisEnum;
1562 oCopy.asCpuIds = list(self.asCpuIds);
1563 oCopy.asReqFeatures = list(self.asReqFeatures);
1564 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1565 oCopy.sMinCpu = self.sMinCpu;
1566 oCopy.oCpuExpr = self.oCpuExpr;
1567 oCopy.sGroup = self.sGroup;
1568 oCopy.fUnused = self.fUnused;
1569 oCopy.fInvalid = self.fInvalid;
1570 oCopy.sInvalidStyle = self.sInvalidStyle;
1571 oCopy.sXcptType = self.sXcptType;
1572
1573 oCopy.sStats = self.sStats;
1574 oCopy.sFunction = self.sFunction;
1575 oCopy.fStub = self.fStub;
1576 oCopy.fUdStub = self.fUdStub;
1577
1578 oCopy.iLineCompleted = self.iLineCompleted;
1579 oCopy.cOpTags = self.cOpTags;
1580 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1581 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1582
1583 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1584 oCopy.asRawDisParams = list(self.asRawDisParams);
1585 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1586 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1587 oCopy.asCopyTests = list(self.asCopyTests);
1588
1589 return oCopy;
1590
1591 def getOpcodeByte(self):
1592 """
1593 Decodes sOpcode into a byte range integer value.
1594 Raises exception if sOpcode is None or invalid.
1595 """
1596 if self.sOpcode is None:
1597 raise Exception('No opcode byte for %s!' % (self,));
1598 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1599
1600 # Full hex byte form.
1601 if sOpcode[:2] == '0x':
1602 return int(sOpcode, 16);
1603
1604 # The /r form:
1605 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1606 return int(sOpcode[1:]) << 3;
1607
1608 # The 11/r form:
1609 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1610 return (int(sOpcode[-1:]) << 3) | 0xc0;
1611
1612 # The !11/r form (returns mod=1):
1613 ## @todo this doesn't really work...
1614 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1615 return (int(sOpcode[-1:]) << 3) | 0x80;
1616
1617 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1618
1619 @staticmethod
1620 def _flagsToIntegerMask(asFlags):
1621 """
1622 Returns the integer mask value for asFlags.
1623 """
1624 uRet = 0;
1625 if asFlags:
1626 for sFlag in asFlags:
1627 sConstant = g_kdEFlagsMnemonics[sFlag];
1628 assert sConstant[0] != '!', sConstant
1629 uRet |= g_kdX86EFlagsConstants[sConstant];
1630 return uRet;
1631
1632 def getTestedFlagsMask(self):
1633 """ Returns asFlTest into a integer mask value """
1634 return self._flagsToIntegerMask(self.asFlTest);
1635
1636 def getModifiedFlagsMask(self):
1637 """ Returns asFlModify into a integer mask value """
1638 return self._flagsToIntegerMask(self.asFlModify);
1639
1640 def getUndefinedFlagsMask(self):
1641 """ Returns asFlUndefined into a integer mask value """
1642 return self._flagsToIntegerMask(self.asFlUndefined);
1643
1644 def getSetFlagsMask(self):
1645 """ Returns asFlSet into a integer mask value """
1646 return self._flagsToIntegerMask(self.asFlSet);
1647
1648 def getClearedFlagsMask(self):
1649 """ Returns asFlClear into a integer mask value """
1650 return self._flagsToIntegerMask(self.asFlClear);
1651
1652 def onlyInVexMaps(self):
1653 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1654 if not self.aoMaps:
1655 return False;
1656 for oMap in self.aoMaps:
1657 if not oMap.isVexMap():
1658 return False;
1659 return True;
1660
1661
1662
1663## All the instructions.
1664g_aoAllInstructions = [] # type: list(Instruction)
1665
1666## All the instructions indexed by statistics name (opstat).
1667g_dAllInstructionsByStat = {} # type: dict(Instruction)
1668
1669## All the instructions indexed by function name (opfunction).
1670g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1671
1672## Instructions tagged by oponlytest
1673g_aoOnlyTestInstructions = [] # type: list(Instruction)
1674
1675## Instruction maps.
1676g_aoInstructionMaps = [
1677 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1678 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1679 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1680 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1681 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1682 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1683 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1684 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1685 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1686 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1687 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1688 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1689 ## @todo g_apfnEscF1_E0toFF
1690 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1691 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1692 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1693 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1694 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1695 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1696 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1698
1699 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1700 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1701 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1702 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1703 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1704 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1705 ## @todo What about g_apfnGroup9MemReg?
1706 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1707 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1708 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1709 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1710 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1711 ## @todo What about g_apfnGroup15RegReg?
1712 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1713 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1714 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1715
1716 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1717 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1718
1719 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1720 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1721 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1722 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1724 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1725
1726 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1727 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1728
1729 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1730 InstructionMap('xopmap8', sEncoding = 'xop8'),
1731 InstructionMap('xopmap9', sEncoding = 'xop9'),
1732 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1733 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1734 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1735 InstructionMap('xopmap10', sEncoding = 'xop10'),
1736 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737];
1738g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1739g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1740
1741
1742#
1743# "Microcode" statements and blocks
1744#
1745
1746class McStmt(object):
1747 """
1748 Statement in a microcode block.
1749 """
1750 def __init__(self, sName, asParams):
1751 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1752 self.asParams = asParams;
1753 self.oUser = None;
1754
1755 def renderCode(self, cchIndent = 0):
1756 """
1757 Renders the code for the statement.
1758 """
1759 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1760
1761 @staticmethod
1762 def renderCodeForList(aoStmts, cchIndent = 0):
1763 """
1764 Renders a list of statements.
1765 """
1766 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1767
1768 @staticmethod
1769 def findStmtByNames(aoStmts, dNames):
1770 """
1771 Returns first statement with any of the given names in from the list.
1772
1773 Note! The names are passed as a dictionary for quick lookup, the value
1774 does not matter.
1775 """
1776 for oStmt in aoStmts:
1777 if oStmt.sName in dNames:
1778 return oStmt;
1779 if isinstance(oStmt, McStmtCond):
1780 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1781 if not oHit:
1782 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1783 if oHit:
1784 return oHit;
1785 return None;
1786
1787 def isCppStmt(self):
1788 """ Checks if this is a C++ statement. """
1789 return self.sName.startswith('C++');
1790
1791class McStmtCond(McStmt):
1792 """
1793 Base class for conditional statements (IEM_MC_IF_XXX).
1794 """
1795 def __init__(self, sName, asParams):
1796 McStmt.__init__(self, sName, asParams);
1797 self.aoIfBranch = [];
1798 self.aoElseBranch = [];
1799
1800 def renderCode(self, cchIndent = 0):
1801 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1802 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1803 if self.aoElseBranch:
1804 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1805 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1806 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1807 return sRet;
1808
1809class McStmtVar(McStmt):
1810 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1811 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1812 McStmt.__init__(self, sName, asParams);
1813 self.sType = sType;
1814 self.sVarName = sVarName;
1815 self.sConstValue = sConstValue; ##< None if not const.
1816
1817class McStmtArg(McStmtVar):
1818 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1819 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1820 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1821 self.iArg = iArg;
1822 self.sRef = sRef; ##< The reference string (local variable, register).
1823 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1824 assert sRefType in ('none', 'local');
1825
1826
1827class McStmtCall(McStmt):
1828 """ IEM_MC_CALL_* """
1829 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1830 McStmt.__init__(self, sName, asParams);
1831 self.idxFn = iFnParam;
1832 self.idxParams = iFnParam + 1;
1833 self.sFn = asParams[iFnParam];
1834 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1835
1836class McCppGeneric(McStmt):
1837 """
1838 Generic C++/C statement.
1839 """
1840 def __init__(self, sCode, fDecode, sName = 'C++'):
1841 McStmt.__init__(self, sName, [sCode,]);
1842 self.fDecode = fDecode;
1843
1844 def renderCode(self, cchIndent = 0):
1845 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1846 if self.fDecode:
1847 sRet = sRet.replace('\n', ' // C++ decode\n');
1848 else:
1849 sRet = sRet.replace('\n', ' // C++ normal\n');
1850 return sRet;
1851
1852class McCppCond(McStmtCond):
1853 """
1854 C++/C 'if' statement.
1855 """
1856 def __init__(self, sCode, fDecode):
1857 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1858 self.fDecode = fDecode;
1859
1860 def renderCode(self, cchIndent = 0):
1861 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1862 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1863 sRet += ' ' * cchIndent + '{\n';
1864 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1865 sRet += ' ' * cchIndent + '}\n';
1866 if self.aoElseBranch:
1867 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1868 sRet += ' ' * cchIndent + '{\n';
1869 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1870 sRet += ' ' * cchIndent + '}\n';
1871 return sRet;
1872
1873class McCppPreProc(McCppGeneric):
1874 """
1875 C++/C Preprocessor directive.
1876 """
1877 def __init__(self, sCode):
1878 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1879
1880 def renderCode(self, cchIndent = 0):
1881 return self.asParams[0] + '\n';
1882
1883
1884class McBlock(object):
1885 """
1886 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1887 """
1888
1889 def __init__(self, sSrcFile, iBeginLine, offBeginLine, sFunction, iInFunction, cchIndent = None):
1890 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1891 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1892 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1893 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1894 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1895 self.sFunction = sFunction; ##< The function the block resides in.
1896 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1897 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1898 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1899 ## Decoded statements in the block.
1900 self.aoStmts = [] # type: list(McStmt)
1901
1902 def complete(self, iEndLine, offEndLine, asLines):
1903 """
1904 Completes the microcode block.
1905 """
1906 assert self.iEndLine == -1;
1907 self.iEndLine = iEndLine;
1908 self.offEndLine = offEndLine;
1909 self.asLines = asLines;
1910
1911 def raiseDecodeError(self, sRawCode, off, sMessage):
1912 """ Raises a decoding error. """
1913 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1914 iLine = sRawCode.count('\n', 0, off);
1915 raise ParserException('%s:%d:%d: parsing error: %s'
1916 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1917
1918 def raiseStmtError(self, sName, sMessage):
1919 """ Raises a statement parser error. """
1920 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1921
1922 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1923 """ Check the parameter count, raising an error it doesn't match. """
1924 if len(asParams) != cParamsExpected:
1925 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1926 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1927 return True;
1928
1929 @staticmethod
1930 def parseMcGeneric(oSelf, sName, asParams):
1931 """ Generic parser that returns a plain McStmt object. """
1932 _ = oSelf;
1933 return McStmt(sName, asParams);
1934
1935 @staticmethod
1936 def parseMcGenericCond(oSelf, sName, asParams):
1937 """ Generic parser that returns a plain McStmtCond object. """
1938 _ = oSelf;
1939 return McStmtCond(sName, asParams);
1940
1941 @staticmethod
1942 def parseMcBegin(oSelf, sName, asParams):
1943 """ IEM_MC_BEGIN """
1944 oSelf.checkStmtParamCount(sName, asParams, 2);
1945 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1946
1947 @staticmethod
1948 def parseMcArg(oSelf, sName, asParams):
1949 """ IEM_MC_ARG """
1950 oSelf.checkStmtParamCount(sName, asParams, 3);
1951 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1952
1953 @staticmethod
1954 def parseMcArgConst(oSelf, sName, asParams):
1955 """ IEM_MC_ARG_CONST """
1956 oSelf.checkStmtParamCount(sName, asParams, 4);
1957 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1958
1959 @staticmethod
1960 def parseMcArgLocalRef(oSelf, sName, asParams):
1961 """ IEM_MC_ARG_LOCAL_REF """
1962 oSelf.checkStmtParamCount(sName, asParams, 4);
1963 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1964
1965 @staticmethod
1966 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1967 """ IEM_MC_ARG_LOCAL_EFLAGS """
1968 oSelf.checkStmtParamCount(sName, asParams, 3);
1969 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
1970 return (
1971 McStmtVar('IEM_MC_LOCAL_VAR', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
1972 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[2], asParams[1]],
1973 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
1974 );
1975
1976 @staticmethod
1977 def parseMcLocal(oSelf, sName, asParams):
1978 """ IEM_MC_LOCAL """
1979 oSelf.checkStmtParamCount(sName, asParams, 2);
1980 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
1981
1982 @staticmethod
1983 def parseMcLocalConst(oSelf, sName, asParams):
1984 """ IEM_MC_LOCAL_CONST """
1985 oSelf.checkStmtParamCount(sName, asParams, 3);
1986 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
1987
1988 @staticmethod
1989 def parseMcCallAImpl(oSelf, sName, asParams):
1990 """ IEM_MC_CALL_AIMPL_3|4 """
1991 cArgs = int(sName[-1]);
1992 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
1993 return McStmtCall(sName, asParams, 1, 0);
1994
1995 @staticmethod
1996 def parseMcCallVoidAImpl(oSelf, sName, asParams):
1997 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
1998 cArgs = int(sName[-1]);
1999 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2000 return McStmtCall(sName, asParams, 0);
2001
2002 @staticmethod
2003 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2004 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2005 cArgs = int(sName[-1]);
2006 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2007 return McStmtCall(sName, asParams, 0);
2008
2009 @staticmethod
2010 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2011 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2012 cArgs = int(sName[-1]);
2013 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2014 return McStmtCall(sName, asParams, 0);
2015
2016 @staticmethod
2017 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2018 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2019 cArgs = int(sName[-1]);
2020 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2021 return McStmtCall(sName, asParams, 0);
2022
2023 @staticmethod
2024 def parseMcCallSseAImpl(oSelf, sName, asParams):
2025 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2026 cArgs = int(sName[-1]);
2027 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2028 return McStmtCall(sName, asParams, 0);
2029
2030 @staticmethod
2031 def parseMcCallCImpl(oSelf, sName, asParams):
2032 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2033 cArgs = int(sName[-1]);
2034 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2035 return McStmtCall(sName, asParams, 0);
2036
2037 @staticmethod
2038 def stripComments(sCode):
2039 """ Returns sCode with comments removed. """
2040 off = 0;
2041 while off < len(sCode):
2042 off = sCode.find('/', off);
2043 if off < 0 or off + 1 >= len(sCode):
2044 break;
2045
2046 if sCode[off + 1] == '/':
2047 # C++ comment.
2048 offEnd = sCode.find('\n', off + 2);
2049 if offEnd < 0:
2050 return sCode[:off].rstrip();
2051 sCode = sCode[ : off] + sCode[offEnd : ];
2052 off += 1;
2053
2054 elif sCode[off + 1] == '*':
2055 # C comment
2056 offEnd = sCode.find('*/', off + 2);
2057 if offEnd < 0:
2058 return sCode[:off].rstrip();
2059 sSep = ' ';
2060 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2061 sSep = '';
2062 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2063 off += len(sSep);
2064
2065 else:
2066 # Not a comment.
2067 off += 1;
2068 return sCode;
2069
2070 @staticmethod
2071 def extractParam(sCode, offParam):
2072 """
2073 Extracts the parameter value at offParam in sCode.
2074 Returns stripped value and the end offset of the terminating ',' or ')'.
2075 """
2076 # Extract it.
2077 cNesting = 0;
2078 offStart = offParam;
2079 while offParam < len(sCode):
2080 ch = sCode[offParam];
2081 if ch == '(':
2082 cNesting += 1;
2083 elif ch == ')':
2084 if cNesting == 0:
2085 break;
2086 cNesting -= 1;
2087 elif ch == ',' and cNesting == 0:
2088 break;
2089 offParam += 1;
2090 return (sCode[offStart : offParam].strip(), offParam);
2091
2092 @staticmethod
2093 def extractParams(sCode, offOpenParen):
2094 """
2095 Parses a parameter list.
2096 Returns the list of parameter values and the offset of the closing parentheses.
2097 Returns (None, len(sCode)) on if no closing parentheses was found.
2098 """
2099 assert sCode[offOpenParen] == '(';
2100 asParams = [];
2101 off = offOpenParen + 1;
2102 while off < len(sCode):
2103 ch = sCode[off];
2104 if ch.isspace():
2105 off += 1;
2106 elif ch != ')':
2107 (sParam, off) = McBlock.extractParam(sCode, off);
2108 asParams.append(sParam);
2109 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2110 if sCode[off] == ',':
2111 off += 1;
2112 else:
2113 return (asParams, off);
2114 return (None, off);
2115
2116 @staticmethod
2117 def findClosingBraces(sCode, off, offStop):
2118 """
2119 Finds the matching '}' for the '{' at off in sCode.
2120 Returns offset of the matching '}' on success, otherwise -1.
2121
2122 Note! Does not take comments into account.
2123 """
2124 cDepth = 1;
2125 off += 1;
2126 while off < offStop:
2127 offClose = sCode.find('}', off, offStop);
2128 if offClose < 0:
2129 break;
2130 cDepth += sCode.count('{', off, offClose);
2131 cDepth -= 1;
2132 if cDepth == 0:
2133 return offClose;
2134 off = offClose + 1;
2135 return -1;
2136
2137 @staticmethod
2138 def countSpacesAt(sCode, off, offStop):
2139 """ Returns the number of space characters at off in sCode. """
2140 offStart = off;
2141 while off < offStop and sCode[off].isspace():
2142 off += 1;
2143 return off - offStart;
2144
2145 @staticmethod
2146 def skipSpacesAt(sCode, off, offStop):
2147 """ Returns first offset at or after off for a non-space character. """
2148 return off + McBlock.countSpacesAt(sCode, off, offStop);
2149
2150 @staticmethod
2151 def isSubstrAt(sStr, off, sSubStr):
2152 """ Returns true of sSubStr is found at off in sStr. """
2153 return sStr[off : off + len(sSubStr)] == sSubStr;
2154
2155 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2156 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2157 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2158 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2159 + r')');
2160
2161 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2162 """
2163 Decodes sRawCode[off : offStop].
2164
2165 Returns list of McStmt instances.
2166 Raises ParserException on failure.
2167 """
2168 if offStop < 0:
2169 offStop = len(sRawCode);
2170 aoStmts = [];
2171 while off < offStop:
2172 ch = sRawCode[off];
2173
2174 #
2175 # Skip spaces and comments.
2176 #
2177 if ch.isspace():
2178 off += 1;
2179
2180 elif ch == '/':
2181 ch = sRawCode[off + 1];
2182 if ch == '/': # C++ comment.
2183 off = sRawCode.find('\n', off + 2);
2184 if off < 0:
2185 break;
2186 off += 1;
2187 elif ch == '*': # C comment.
2188 off = sRawCode.find('*/', off + 2);
2189 if off < 0:
2190 break;
2191 off += 2;
2192 else:
2193 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2194
2195 #
2196 # Is it a MC statement.
2197 #
2198 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2199 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2200 # Extract it and strip comments from it.
2201 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2202 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2203 if offEnd <= off:
2204 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2205 else:
2206 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2207 if offEnd <= off:
2208 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2209 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2210 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2211 offEnd -= 1;
2212 while offEnd > off and sRawCode[offEnd - 1].isspace():
2213 offEnd -= 1;
2214
2215 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2216
2217 # Isolate the statement name.
2218 offOpenParen = sRawStmt.find('(');
2219 if offOpenParen < 0:
2220 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2221 sName = sRawStmt[: offOpenParen].strip();
2222
2223 # Extract the parameters.
2224 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2225 if asParams is None:
2226 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2227 if offCloseParen + 1 != len(sRawStmt):
2228 self.raiseDecodeError(sRawCode, off,
2229 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2230
2231 # Hand it to the handler.
2232 fnParser = g_dMcStmtParsers.get(sName);
2233 if not fnParser:
2234 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2235 oStmt = fnParser(self, sName, asParams);
2236 if not isinstance(oStmt, (list, tuple)):
2237 aoStmts.append(oStmt);
2238 else:
2239 aoStmts.extend(oStmt);
2240
2241 #
2242 # If conditional, we need to parse the whole statement.
2243 #
2244 # For reasons of simplicity, we assume the following structure
2245 # and parse each branch in a recursive call:
2246 # IEM_MC_IF_XXX() {
2247 # IEM_MC_WHATEVER();
2248 # } IEM_MC_ELSE() {
2249 # IEM_MC_WHATEVER();
2250 # } IEM_MC_ENDIF();
2251 #
2252 if sName.startswith('IEM_MC_IF_'):
2253 if iLevel > 1:
2254 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2255
2256 # Find start of the IF block:
2257 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2258 if sRawCode[offBlock1] != '{':
2259 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2260
2261 # Find the end of it.
2262 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2263 if offBlock1End < 0:
2264 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2265
2266 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2267
2268 # Is there an else section?
2269 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2270 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2271 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2272 if sRawCode[off] != '(':
2273 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2274 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2275 if sRawCode[off] != ')':
2276 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2277
2278 # Find start of the ELSE block.
2279 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2280 if sRawCode[offBlock2] != '{':
2281 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2282
2283 # Find the end of it.
2284 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2285 if offBlock2End < 0:
2286 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2287
2288 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2289 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2290
2291 # Parse past the endif statement.
2292 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2293 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2294 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2295 if sRawCode[off] != '(':
2296 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2297 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2298 if sRawCode[off] != ')':
2299 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2300 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2301 if sRawCode[off] != ';':
2302 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2303 off += 1;
2304
2305 else:
2306 # Advance.
2307 off = offEnd + 1;
2308
2309 #
2310 # Otherwise it must be a C/C++ statement of sorts.
2311 #
2312 else:
2313 # Find the end of the statement. if and else requires special handling.
2314 sCondExpr = None;
2315 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2316 if oMatch:
2317 if oMatch.group(1)[-1] == '(':
2318 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2319 else:
2320 offEnd = oMatch.end();
2321 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2322 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2323 elif ch == '#':
2324 offEnd = sRawCode.find('\n', off, offStop);
2325 if offEnd < 0:
2326 offEnd = offStop;
2327 offEnd -= 1;
2328 while offEnd > off and sRawCode[offEnd - 1].isspace():
2329 offEnd -= 1;
2330 else:
2331 offEnd = sRawCode.find(';', off);
2332 if offEnd < 0:
2333 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2334
2335 # Check this and the following statement whether it might have
2336 # something to do with decoding. This is a statement filter
2337 # criteria when generating the threaded functions blocks.
2338 offNextEnd = sRawCode.find(';', offEnd + 1);
2339 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2340 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2341 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2342 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2343 );
2344
2345 if not oMatch:
2346 if ch != '#':
2347 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2348 else:
2349 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2350 off = offEnd + 1;
2351 elif oMatch.group(1).startswith('if'):
2352 #
2353 # if () xxx [else yyy] statement.
2354 #
2355 oStmt = McCppCond(sCondExpr, fDecode);
2356 aoStmts.append(oStmt);
2357 off = offEnd + 1;
2358
2359 # Following the if () we can either have a {} containing zero or more statements
2360 # or we have a single statement.
2361 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2362 if sRawCode[offBlock1] == '{':
2363 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2364 if offBlock1End < 0:
2365 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2366 offBlock1 += 1;
2367 else:
2368 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2369 if offBlock1End < 0:
2370 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2371
2372 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2373
2374 # The else is optional and can likewise be followed by {} or a single statement.
2375 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2376 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2377 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2378 if sRawCode[offBlock2] == '{':
2379 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2380 if offBlock2End < 0:
2381 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2382 offBlock2 += 1;
2383 else:
2384 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2385 if offBlock2End < 0:
2386 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2387
2388 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2389 off = offBlock2End + 1;
2390
2391 elif oMatch.group(1) == 'else':
2392 # Problematic 'else' branch, typically involving #ifdefs.
2393 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2394
2395 return aoStmts;
2396
2397 def decode(self):
2398 """
2399 Decodes the block, populating self.aoStmts.
2400 Returns the statement list.
2401 Raises ParserException on failure.
2402 """
2403 self.aoStmts = self.decodeCode(''.join(self.asLines));
2404 return self.aoStmts;
2405
2406
2407## IEM_MC_XXX -> parser dictionary.
2408# The raw table was generated via the following command
2409# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2410# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2411g_dMcStmtParsers = {
2412 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2413 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2414 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2415 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2416 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2417 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2418 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2419 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2420 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2421 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2422 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2423 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2424 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2425 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2426 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2427 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2428 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2429 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2430 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2431 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2432 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2433 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2434 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2435 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2436 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2437 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2438 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2439 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2440 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2441 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2442 'IEM_MC_ARG': McBlock.parseMcArg,
2443 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2444 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2445 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2446 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2447 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2448 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2449 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2450 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2451 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2452 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2453 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2454 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2455 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2456 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2457 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2458 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2459 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2460 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2461 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2462 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2463 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2464 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2465 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2466 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2467 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2468 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2469 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2470 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2471 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2472 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2473 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2474 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2475 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2476 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2477 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2478 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2479 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2480 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2481 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2482 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2483 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2484 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2485 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2486 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2487 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2488 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2489 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2490 'IEM_MC_END': McBlock.parseMcGeneric,
2491 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2492 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2493 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2494 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2495 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2496 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2497 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2498 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2499 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2500 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2501 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2502 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2503 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2504 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2505 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2506 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2507 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2508 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2509 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2510 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2511 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2512 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2513 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2514 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2515 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2516 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2517 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2518 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2519 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2520 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2521 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2522 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2523 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2574 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2575 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2576 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2577 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2578 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2579 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2580 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2581 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2582 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2583 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2584 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2585 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2586 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2587 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2588 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2589 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2590 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2591 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2592 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2593 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2594 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2595 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2596 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2597 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2598 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2599 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2600 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2601 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2602 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2603 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2604 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2605 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2606 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2607 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2608 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2609 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2610 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2611 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2612 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2613 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2614 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2615 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2616 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2617 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2618 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2619 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2620 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2621 'IEM_MC_MAYBE_RAISE_AVX2_RELATED_XCPT': McBlock.parseMcGeneric,
2622 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2623 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2624 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2625 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2626 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT': McBlock.parseMcGeneric,
2627 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_EX': McBlock.parseMcGeneric,
2628 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2629 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2630 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2631 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2632 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2633 'IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT': McBlock.parseMcGeneric,
2634 'IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2635 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2636 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2637 'IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2638 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2639 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2640 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2641 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2642 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2643 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2644 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2645 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2646 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2647 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2648 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2649 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2650 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2651 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2652 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2653 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2654 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2655 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2656 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2657 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2658 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2659 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2660 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2661 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2662 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2663 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2664 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2665 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2666 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2667 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2668 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2669 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2670 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2671 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2672 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2673 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2674 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2675 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2676 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2677 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2678 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2679 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2680 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2681 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2682 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2683 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2684 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2685 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2686 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2687 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2688 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2689 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2690 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2691 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2692 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2693 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2694 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2695 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2696 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2697 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2698 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2699 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2700 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2701 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2702 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2703 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2704 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2705 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2706 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2707 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2708 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2709 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2710 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2711 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2712 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2713 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2714 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2715 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2716 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2717 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2718 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2719 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2720 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2721 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2722 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2723 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2724 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2725 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2726 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2727 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2728 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2729 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2730 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2731 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2732 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2733 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2734 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2735 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2736 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2737 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2738 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2739 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2740 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2741 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2742 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2743 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2744 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2745 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2746 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2747 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2775 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2776 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2777 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2778 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2779 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2780 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2781 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2782 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2783 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2784 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2785 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2786 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2787};
2788
2789## List of microcode blocks.
2790g_aoMcBlocks = [] # type: list(McBlock)
2791
2792
2793
2794class ParserException(Exception):
2795 """ Parser exception """
2796 def __init__(self, sMessage):
2797 Exception.__init__(self, sMessage);
2798
2799
2800class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2801 """
2802 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2803 """
2804
2805 ## @name Parser state.
2806 ## @{
2807 kiCode = 0;
2808 kiCommentMulti = 1;
2809 ## @}
2810
2811 class Macro(object):
2812 """ Macro """
2813 def __init__(self, sName, asArgs, sBody, iLine):
2814 self.sName = sName; ##< The macro name.
2815 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2816 self.sBody = sBody;
2817 self.iLine = iLine;
2818 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2819
2820 @staticmethod
2821 def _needSpace(ch):
2822 """ This is just to make the expanded output a bit prettier. """
2823 return ch.isspace() and ch != '(';
2824
2825 def expandMacro(self, oParent, asArgs = None):
2826 """ Expands the macro body with the given arguments. """
2827 _ = oParent;
2828 sBody = self.sBody;
2829
2830 if self.oReArgMatch:
2831 assert len(asArgs) == len(self.asArgs);
2832 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2833
2834 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2835 oMatch = self.oReArgMatch.search(sBody);
2836 while oMatch:
2837 sName = oMatch.group(2);
2838 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2839 sValue = dArgs[sName];
2840 sPre = '';
2841 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2842 sPre = ' ';
2843 sPost = '';
2844 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2845 sPost = ' ';
2846 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2847 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2848 else:
2849 assert not asArgs;
2850
2851 return sBody;
2852
2853
2854 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2855 self.sSrcFile = sSrcFile;
2856 self.asLines = asLines;
2857 self.iLine = 0;
2858 self.iState = self.kiCode;
2859 self.sComment = '';
2860 self.iCommentLine = 0;
2861 self.aoCurInstrs = [] # type: list(Instruction)
2862 self.sCurFunction = None # type: str
2863 self.iMcBlockInFunc = 0;
2864 self.oCurMcBlock = None # type: McBlock
2865 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2866 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2867 if oInheritMacrosFrom:
2868 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2869 self.oReMacros = oInheritMacrosFrom.oReMacros;
2870
2871 assert sDefaultMap in g_dInstructionMaps;
2872 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2873
2874 self.cTotalInstr = 0;
2875 self.cTotalStubs = 0;
2876 self.cTotalTagged = 0;
2877 self.cTotalMcBlocks = 0;
2878
2879 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2880 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2881 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2882 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2883 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2884 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2885 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2886 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2887 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2888 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2889 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2890 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2891 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2892
2893 self.fDebug = True;
2894 self.fDebugMc = False;
2895 self.fDebugPreProc = False;
2896
2897 self.dTagHandlers = {
2898 '@opbrief': self.parseTagOpBrief,
2899 '@opdesc': self.parseTagOpDesc,
2900 '@opmnemonic': self.parseTagOpMnemonic,
2901 '@op1': self.parseTagOpOperandN,
2902 '@op2': self.parseTagOpOperandN,
2903 '@op3': self.parseTagOpOperandN,
2904 '@op4': self.parseTagOpOperandN,
2905 '@oppfx': self.parseTagOpPfx,
2906 '@opmaps': self.parseTagOpMaps,
2907 '@opcode': self.parseTagOpcode,
2908 '@opcodesub': self.parseTagOpcodeSub,
2909 '@openc': self.parseTagOpEnc,
2910 '@opfltest': self.parseTagOpEFlags,
2911 '@opflmodify': self.parseTagOpEFlags,
2912 '@opflundef': self.parseTagOpEFlags,
2913 '@opflset': self.parseTagOpEFlags,
2914 '@opflclear': self.parseTagOpEFlags,
2915 '@ophints': self.parseTagOpHints,
2916 '@opdisenum': self.parseTagOpDisEnum,
2917 '@opmincpu': self.parseTagOpMinCpu,
2918 '@opcpuid': self.parseTagOpCpuId,
2919 '@opgroup': self.parseTagOpGroup,
2920 '@opunused': self.parseTagOpUnusedInvalid,
2921 '@opinvalid': self.parseTagOpUnusedInvalid,
2922 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2923 '@optest': self.parseTagOpTest,
2924 '@optestign': self.parseTagOpTestIgnore,
2925 '@optestignore': self.parseTagOpTestIgnore,
2926 '@opcopytests': self.parseTagOpCopyTests,
2927 '@oponly': self.parseTagOpOnlyTest,
2928 '@oponlytest': self.parseTagOpOnlyTest,
2929 '@opxcpttype': self.parseTagOpXcptType,
2930 '@opstats': self.parseTagOpStats,
2931 '@opfunction': self.parseTagOpFunction,
2932 '@opdone': self.parseTagOpDone,
2933 };
2934 for i in range(48):
2935 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2936 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2937
2938 self.asErrors = [];
2939
2940 def raiseError(self, sMessage):
2941 """
2942 Raise error prefixed with the source and line number.
2943 """
2944 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2945
2946 def raiseCommentError(self, iLineInComment, sMessage):
2947 """
2948 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2949 """
2950 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2951
2952 def error(self, sMessage):
2953 """
2954 Adds an error.
2955 returns False;
2956 """
2957 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2958 return False;
2959
2960 def errorOnLine(self, iLine, sMessage):
2961 """
2962 Adds an error.
2963 returns False;
2964 """
2965 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2966 return False;
2967
2968 def errorComment(self, iLineInComment, sMessage):
2969 """
2970 Adds a comment error.
2971 returns False;
2972 """
2973 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2974 return False;
2975
2976 def printErrors(self):
2977 """
2978 Print the errors to stderr.
2979 Returns number of errors.
2980 """
2981 if self.asErrors:
2982 sys.stderr.write(u''.join(self.asErrors));
2983 return len(self.asErrors);
2984
2985 def debug(self, sMessage):
2986 """
2987 For debugging.
2988 """
2989 if self.fDebug:
2990 print('debug: %s' % (sMessage,), file = sys.stderr);
2991
2992 def stripComments(self, sLine):
2993 """
2994 Returns sLine with comments stripped.
2995
2996 Complains if traces of incomplete multi-line comments are encountered.
2997 """
2998 sLine = self.oReComment.sub(" ", sLine);
2999 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3000 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3001 return sLine;
3002
3003 def parseFunctionTable(self, sLine):
3004 """
3005 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3006
3007 Note! Updates iLine as it consumes the whole table.
3008 """
3009
3010 #
3011 # Extract the table name.
3012 #
3013 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3014 oMap = g_dInstructionMapsByIemName.get(sName);
3015 if not oMap:
3016 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3017 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3018
3019 #
3020 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3021 # entries per byte:
3022 # no prefix, 066h prefix, f3h prefix, f2h prefix
3023 # Those tables has 256 & 32 entries respectively.
3024 #
3025 cEntriesPerByte = 4;
3026 cValidTableLength = 1024;
3027 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3028
3029 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3030 if oEntriesMatch:
3031 cEntriesPerByte = 1;
3032 cValidTableLength = int(oEntriesMatch.group(1));
3033 asPrefixes = (None,);
3034
3035 #
3036 # The next line should be '{' and nothing else.
3037 #
3038 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3039 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3040 self.iLine += 1;
3041
3042 #
3043 # Parse till we find the end of the table.
3044 #
3045 iEntry = 0;
3046 while self.iLine < len(self.asLines):
3047 # Get the next line and strip comments and spaces (assumes no
3048 # multi-line comments).
3049 sLine = self.asLines[self.iLine];
3050 self.iLine += 1;
3051 sLine = self.stripComments(sLine).strip();
3052
3053 # Split the line up into entries, expanding IEMOP_X4 usage.
3054 asEntries = sLine.split(',');
3055 for i in range(len(asEntries) - 1, -1, -1):
3056 sEntry = asEntries[i].strip();
3057 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3058 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3059 asEntries.insert(i + 1, sEntry);
3060 asEntries.insert(i + 1, sEntry);
3061 asEntries.insert(i + 1, sEntry);
3062 if sEntry:
3063 asEntries[i] = sEntry;
3064 else:
3065 del asEntries[i];
3066
3067 # Process the entries.
3068 for sEntry in asEntries:
3069 if sEntry in ('};', '}'):
3070 if iEntry != cValidTableLength:
3071 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3072 return True;
3073 if sEntry.startswith('iemOp_Invalid'):
3074 pass; # skip
3075 else:
3076 # Look up matching instruction by function.
3077 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3078 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3079 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3080 if aoInstr:
3081 if not isinstance(aoInstr, list):
3082 aoInstr = [aoInstr,];
3083 oInstr = None;
3084 for oCurInstr in aoInstr:
3085 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3086 pass;
3087 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3088 oCurInstr.sPrefix = sPrefix;
3089 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3090 oCurInstr.sOpcode = sOpcode;
3091 oCurInstr.sPrefix = sPrefix;
3092 else:
3093 continue;
3094 oInstr = oCurInstr;
3095 break;
3096 if not oInstr:
3097 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3098 aoInstr.append(oInstr);
3099 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3100 g_aoAllInstructions.append(oInstr);
3101 oMap.aoInstructions.append(oInstr);
3102 else:
3103 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3104 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3105 iEntry += 1;
3106
3107 return self.error('Unexpected end of file in PFNIEMOP table');
3108
3109 def addInstruction(self, iLine = None):
3110 """
3111 Adds an instruction.
3112 """
3113 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3114 g_aoAllInstructions.append(oInstr);
3115 self.aoCurInstrs.append(oInstr);
3116 return oInstr;
3117
3118 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3119 """
3120 Derives the mnemonic and operands from a IEM stats base name like string.
3121 """
3122 if oInstr.sMnemonic is None:
3123 asWords = sStats.split('_');
3124 oInstr.sMnemonic = asWords[0].lower();
3125 if len(asWords) > 1 and not oInstr.aoOperands:
3126 for sType in asWords[1:]:
3127 if sType in g_kdOpTypes:
3128 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3129 else:
3130 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3131 return False;
3132 return True;
3133
3134 def doneInstructionOne(self, oInstr, iLine):
3135 """
3136 Complete the parsing by processing, validating and expanding raw inputs.
3137 """
3138 assert oInstr.iLineCompleted is None;
3139 oInstr.iLineCompleted = iLine;
3140
3141 #
3142 # Specified instructions.
3143 #
3144 if oInstr.cOpTags > 0:
3145 if oInstr.sStats is None:
3146 pass;
3147
3148 #
3149 # Unspecified legacy stuff. We generally only got a few things to go on here.
3150 # /** Opcode 0x0f 0x00 /0. */
3151 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3152 #
3153 else:
3154 #if oInstr.sRawOldOpcodes:
3155 #
3156 #if oInstr.sMnemonic:
3157 pass;
3158
3159 #
3160 # Common defaults.
3161 #
3162
3163 # Guess mnemonic and operands from stats if the former is missing.
3164 if oInstr.sMnemonic is None:
3165 if oInstr.sStats is not None:
3166 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3167 elif oInstr.sFunction is not None:
3168 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3169
3170 # Derive the disassembler op enum constant from the mnemonic.
3171 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3172 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3173
3174 # Derive the IEM statistics base name from mnemonic and operand types.
3175 if oInstr.sStats is None:
3176 if oInstr.sFunction is not None:
3177 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3178 elif oInstr.sMnemonic is not None:
3179 oInstr.sStats = oInstr.sMnemonic;
3180 for oOperand in oInstr.aoOperands:
3181 if oOperand.sType:
3182 oInstr.sStats += '_' + oOperand.sType;
3183
3184 # Derive the IEM function name from mnemonic and operand types.
3185 if oInstr.sFunction is None:
3186 if oInstr.sMnemonic is not None:
3187 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3188 for oOperand in oInstr.aoOperands:
3189 if oOperand.sType:
3190 oInstr.sFunction += '_' + oOperand.sType;
3191 elif oInstr.sStats:
3192 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3193
3194 #
3195 # Apply default map and then add the instruction to all it's groups.
3196 #
3197 if not oInstr.aoMaps:
3198 oInstr.aoMaps = [ self.oDefaultMap, ];
3199 for oMap in oInstr.aoMaps:
3200 oMap.aoInstructions.append(oInstr);
3201
3202 #
3203 # Derive encoding from operands and maps.
3204 #
3205 if oInstr.sEncoding is None:
3206 if not oInstr.aoOperands:
3207 if oInstr.fUnused and oInstr.sSubOpcode:
3208 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3209 else:
3210 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3211 elif oInstr.aoOperands[0].usesModRM():
3212 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3213 or oInstr.onlyInVexMaps():
3214 oInstr.sEncoding = 'VEX.ModR/M';
3215 else:
3216 oInstr.sEncoding = 'ModR/M';
3217
3218 #
3219 # Check the opstat value and add it to the opstat indexed dictionary.
3220 #
3221 if oInstr.sStats:
3222 if oInstr.sStats not in g_dAllInstructionsByStat:
3223 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3224 else:
3225 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3226 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3227
3228 #
3229 # Add to function indexed dictionary. We allow multiple instructions per function.
3230 #
3231 if oInstr.sFunction:
3232 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3233 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3234 else:
3235 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3236
3237 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3238 return True;
3239
3240 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3241 """
3242 Done with current instruction.
3243 """
3244 for oInstr in self.aoCurInstrs:
3245 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3246 if oInstr.fStub:
3247 self.cTotalStubs += 1;
3248
3249 self.cTotalInstr += len(self.aoCurInstrs);
3250
3251 self.sComment = '';
3252 self.aoCurInstrs = [];
3253 if fEndOfFunction:
3254 #self.debug('%s: sCurFunction=None' % (self.iLine, ));
3255 self.sCurFunction = None;
3256 self.iMcBlockInFunc = 0;
3257 return True;
3258
3259 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3260 """
3261 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3262 is False, only None values and empty strings are replaced.
3263 """
3264 for oInstr in self.aoCurInstrs:
3265 if fOverwrite is not True:
3266 oOldValue = getattr(oInstr, sAttrib);
3267 if oOldValue is not None:
3268 continue;
3269 setattr(oInstr, sAttrib, oValue);
3270
3271 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3272 """
3273 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3274 If fOverwrite is False, only None values and empty strings are replaced.
3275 """
3276 for oInstr in self.aoCurInstrs:
3277 aoArray = getattr(oInstr, sAttrib);
3278 while len(aoArray) <= iEntry:
3279 aoArray.append(None);
3280 if fOverwrite is True or aoArray[iEntry] is None:
3281 aoArray[iEntry] = oValue;
3282
3283 def parseCommentOldOpcode(self, asLines):
3284 """ Deals with 'Opcode 0xff /4' like comments """
3285 asWords = asLines[0].split();
3286 if len(asWords) >= 2 \
3287 and asWords[0] == 'Opcode' \
3288 and ( asWords[1].startswith('0x')
3289 or asWords[1].startswith('0X')):
3290 asWords = asWords[:1];
3291 for iWord, sWord in enumerate(asWords):
3292 if sWord.startswith('0X'):
3293 sWord = '0x' + sWord[:2];
3294 asWords[iWord] = asWords;
3295 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3296
3297 return False;
3298
3299 def ensureInstructionForOpTag(self, iTagLine):
3300 """ Ensure there is an instruction for the op-tag being parsed. """
3301 if not self.aoCurInstrs:
3302 self.addInstruction(self.iCommentLine + iTagLine);
3303 for oInstr in self.aoCurInstrs:
3304 oInstr.cOpTags += 1;
3305 if oInstr.cOpTags == 1:
3306 self.cTotalTagged += 1;
3307 return self.aoCurInstrs[-1];
3308
3309 @staticmethod
3310 def flattenSections(aasSections):
3311 """
3312 Flattens multiline sections into stripped single strings.
3313 Returns list of strings, on section per string.
3314 """
3315 asRet = [];
3316 for asLines in aasSections:
3317 if asLines:
3318 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3319 return asRet;
3320
3321 @staticmethod
3322 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3323 """
3324 Flattens sections into a simple stripped string with newlines as
3325 section breaks. The final section does not sport a trailing newline.
3326 """
3327 # Typical: One section with a single line.
3328 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3329 return aasSections[0][0].strip();
3330
3331 sRet = '';
3332 for iSection, asLines in enumerate(aasSections):
3333 if asLines:
3334 if iSection > 0:
3335 sRet += sSectionSep;
3336 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3337 return sRet;
3338
3339
3340
3341 ## @name Tag parsers
3342 ## @{
3343
3344 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3345 """
3346 Tag: \@opbrief
3347 Value: Text description, multiple sections, appended.
3348
3349 Brief description. If not given, it's the first sentence from @opdesc.
3350 """
3351 oInstr = self.ensureInstructionForOpTag(iTagLine);
3352
3353 # Flatten and validate the value.
3354 sBrief = self.flattenAllSections(aasSections);
3355 if not sBrief:
3356 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3357 if sBrief[-1] != '.':
3358 sBrief = sBrief + '.';
3359 if len(sBrief) > 180:
3360 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3361 offDot = sBrief.find('.');
3362 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3363 offDot = sBrief.find('.', offDot + 1);
3364 if offDot >= 0 and offDot != len(sBrief) - 1:
3365 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3366
3367 # Update the instruction.
3368 if oInstr.sBrief is not None:
3369 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3370 % (sTag, oInstr.sBrief, sBrief,));
3371 _ = iEndLine;
3372 return True;
3373
3374 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3375 """
3376 Tag: \@opdesc
3377 Value: Text description, multiple sections, appended.
3378
3379 It is used to describe instructions.
3380 """
3381 oInstr = self.ensureInstructionForOpTag(iTagLine);
3382 if aasSections:
3383 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3384 return True;
3385
3386 _ = sTag; _ = iEndLine;
3387 return True;
3388
3389 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3390 """
3391 Tag: @opmenmonic
3392 Value: mnemonic
3393
3394 The 'mnemonic' value must be a valid C identifier string. Because of
3395 prefixes, groups and whatnot, there times when the mnemonic isn't that
3396 of an actual assembler mnemonic.
3397 """
3398 oInstr = self.ensureInstructionForOpTag(iTagLine);
3399
3400 # Flatten and validate the value.
3401 sMnemonic = self.flattenAllSections(aasSections);
3402 if not self.oReMnemonic.match(sMnemonic):
3403 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3404 if oInstr.sMnemonic is not None:
3405 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3406 % (sTag, oInstr.sMnemonic, sMnemonic,));
3407 oInstr.sMnemonic = sMnemonic
3408
3409 _ = iEndLine;
3410 return True;
3411
3412 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3413 """
3414 Tags: \@op1, \@op2, \@op3, \@op4
3415 Value: [where:]type
3416
3417 The 'where' value indicates where the operand is found, like the 'reg'
3418 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3419 a list.
3420
3421 The 'type' value indicates the operand type. These follow the types
3422 given in the opcode tables in the CPU reference manuals.
3423 See Instruction.kdOperandTypes for a list.
3424
3425 """
3426 oInstr = self.ensureInstructionForOpTag(iTagLine);
3427 idxOp = int(sTag[-1]) - 1;
3428 assert 0 <= idxOp < 4;
3429
3430 # flatten, split up, and validate the "where:type" value.
3431 sFlattened = self.flattenAllSections(aasSections);
3432 asSplit = sFlattened.split(':');
3433 if len(asSplit) == 1:
3434 sType = asSplit[0];
3435 sWhere = None;
3436 elif len(asSplit) == 2:
3437 (sWhere, sType) = asSplit;
3438 else:
3439 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3440
3441 if sType not in g_kdOpTypes:
3442 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3443 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3444 if sWhere is None:
3445 sWhere = g_kdOpTypes[sType][1];
3446 elif sWhere not in g_kdOpLocations:
3447 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3448 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3449
3450 # Insert the operand, refusing to overwrite an existing one.
3451 while idxOp >= len(oInstr.aoOperands):
3452 oInstr.aoOperands.append(None);
3453 if oInstr.aoOperands[idxOp] is not None:
3454 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3455 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3456 sWhere, sType,));
3457 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3458
3459 _ = iEndLine;
3460 return True;
3461
3462 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3463 """
3464 Tag: \@opmaps
3465 Value: map[,map2]
3466
3467 Indicates which maps the instruction is in. There is a default map
3468 associated with each input file.
3469 """
3470 oInstr = self.ensureInstructionForOpTag(iTagLine);
3471
3472 # Flatten, split up and validate the value.
3473 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3474 asMaps = sFlattened.split(',');
3475 if not asMaps:
3476 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3477 for sMap in asMaps:
3478 if sMap not in g_dInstructionMaps:
3479 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3480 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3481
3482 # Add the maps to the current list. Throw errors on duplicates.
3483 for oMap in oInstr.aoMaps:
3484 if oMap.sName in asMaps:
3485 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3486
3487 for sMap in asMaps:
3488 oMap = g_dInstructionMaps[sMap];
3489 if oMap not in oInstr.aoMaps:
3490 oInstr.aoMaps.append(oMap);
3491 else:
3492 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3493
3494 _ = iEndLine;
3495 return True;
3496
3497 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3498 """
3499 Tag: \@oppfx
3500 Value: n/a|none|0x66|0xf3|0xf2
3501
3502 Required prefix for the instruction. (In a (E)VEX context this is the
3503 value of the 'pp' field rather than an actual prefix.)
3504 """
3505 oInstr = self.ensureInstructionForOpTag(iTagLine);
3506
3507 # Flatten and validate the value.
3508 sFlattened = self.flattenAllSections(aasSections);
3509 asPrefixes = sFlattened.split();
3510 if len(asPrefixes) > 1:
3511 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3512
3513 sPrefix = asPrefixes[0].lower();
3514 if sPrefix == 'none':
3515 sPrefix = 'none';
3516 elif sPrefix == 'n/a':
3517 sPrefix = None;
3518 else:
3519 if len(sPrefix) == 2:
3520 sPrefix = '0x' + sPrefix;
3521 if not _isValidOpcodeByte(sPrefix):
3522 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3523
3524 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3525 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3526
3527 # Set it.
3528 if oInstr.sPrefix is not None:
3529 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3530 oInstr.sPrefix = sPrefix;
3531
3532 _ = iEndLine;
3533 return True;
3534
3535 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3536 """
3537 Tag: \@opcode
3538 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3539
3540 The opcode byte or sub-byte for the instruction in the context of a map.
3541 """
3542 oInstr = self.ensureInstructionForOpTag(iTagLine);
3543
3544 # Flatten and validate the value.
3545 sOpcode = self.flattenAllSections(aasSections);
3546 if _isValidOpcodeByte(sOpcode):
3547 pass;
3548 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3549 pass;
3550 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3551 pass;
3552 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3553 pass;
3554 else:
3555 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3556
3557 # Set it.
3558 if oInstr.sOpcode is not None:
3559 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3560 oInstr.sOpcode = sOpcode;
3561
3562 _ = iEndLine;
3563 return True;
3564
3565 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3566 """
3567 Tag: \@opcodesub
3568 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3569 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3570
3571 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3572 represents exactly two different instructions. The more proper way would
3573 be to go via maps with two members, but this is faster.
3574 """
3575 oInstr = self.ensureInstructionForOpTag(iTagLine);
3576
3577 # Flatten and validate the value.
3578 sSubOpcode = self.flattenAllSections(aasSections);
3579 if sSubOpcode not in g_kdSubOpcodes:
3580 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3581 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3582
3583 # Set it.
3584 if oInstr.sSubOpcode is not None:
3585 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3586 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3587 oInstr.sSubOpcode = sSubOpcode;
3588
3589 _ = iEndLine;
3590 return True;
3591
3592 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3593 """
3594 Tag: \@openc
3595 Value: ModR/M|fixed|prefix|<map name>
3596
3597 The instruction operand encoding style.
3598 """
3599 oInstr = self.ensureInstructionForOpTag(iTagLine);
3600
3601 # Flatten and validate the value.
3602 sEncoding = self.flattenAllSections(aasSections);
3603 if sEncoding in g_kdEncodings:
3604 pass;
3605 elif sEncoding in g_dInstructionMaps:
3606 pass;
3607 elif not _isValidOpcodeByte(sEncoding):
3608 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3609
3610 # Set it.
3611 if oInstr.sEncoding is not None:
3612 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3613 % ( sTag, oInstr.sEncoding, sEncoding,));
3614 oInstr.sEncoding = sEncoding;
3615
3616 _ = iEndLine;
3617 return True;
3618
3619 ## EFlags tag to Instruction attribute name.
3620 kdOpFlagToAttr = {
3621 '@opfltest': 'asFlTest',
3622 '@opflmodify': 'asFlModify',
3623 '@opflundef': 'asFlUndefined',
3624 '@opflset': 'asFlSet',
3625 '@opflclear': 'asFlClear',
3626 };
3627
3628 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3629 """
3630 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3631 Value: <eflags specifier>
3632
3633 """
3634 oInstr = self.ensureInstructionForOpTag(iTagLine);
3635
3636 # Flatten, split up and validate the values.
3637 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3638 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3639 asFlags = [];
3640 else:
3641 fRc = True;
3642 for iFlag, sFlag in enumerate(asFlags):
3643 if sFlag not in g_kdEFlagsMnemonics:
3644 if sFlag.strip() in g_kdEFlagsMnemonics:
3645 asFlags[iFlag] = sFlag.strip();
3646 else:
3647 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3648 if not fRc:
3649 return False;
3650
3651 # Set them.
3652 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3653 if asOld is not None:
3654 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3655 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3656
3657 _ = iEndLine;
3658 return True;
3659
3660 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3661 """
3662 Tag: \@ophints
3663 Value: Comma or space separated list of flags and hints.
3664
3665 This covers the disassembler flags table and more.
3666 """
3667 oInstr = self.ensureInstructionForOpTag(iTagLine);
3668
3669 # Flatten as a space separated list, split it up and validate the values.
3670 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3671 if len(asHints) == 1 and asHints[0].lower() == 'none':
3672 asHints = [];
3673 else:
3674 fRc = True;
3675 for iHint, sHint in enumerate(asHints):
3676 if sHint not in g_kdHints:
3677 if sHint.strip() in g_kdHints:
3678 sHint[iHint] = sHint.strip();
3679 else:
3680 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3681 if not fRc:
3682 return False;
3683
3684 # Append them.
3685 for sHint in asHints:
3686 if sHint not in oInstr.dHints:
3687 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3688 else:
3689 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3690
3691 _ = iEndLine;
3692 return True;
3693
3694 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3695 """
3696 Tag: \@opdisenum
3697 Value: OP_XXXX
3698
3699 This is for select a specific (legacy) disassembler enum value for the
3700 instruction.
3701 """
3702 oInstr = self.ensureInstructionForOpTag(iTagLine);
3703
3704 # Flatten and split.
3705 asWords = self.flattenAllSections(aasSections).split();
3706 if len(asWords) != 1:
3707 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3708 if not asWords:
3709 return False;
3710 sDisEnum = asWords[0];
3711 if not self.oReDisEnum.match(sDisEnum):
3712 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3713 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3714
3715 # Set it.
3716 if oInstr.sDisEnum is not None:
3717 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3718 oInstr.sDisEnum = sDisEnum;
3719
3720 _ = iEndLine;
3721 return True;
3722
3723 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3724 """
3725 Tag: \@opmincpu
3726 Value: <simple CPU name>
3727
3728 Indicates when this instruction was introduced.
3729 """
3730 oInstr = self.ensureInstructionForOpTag(iTagLine);
3731
3732 # Flatten the value, split into words, make sure there's just one, valid it.
3733 asCpus = self.flattenAllSections(aasSections).split();
3734 if len(asCpus) > 1:
3735 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3736
3737 sMinCpu = asCpus[0];
3738 if sMinCpu in g_kdCpuNames:
3739 oInstr.sMinCpu = sMinCpu;
3740 else:
3741 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3742 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3743
3744 # Set it.
3745 if oInstr.sMinCpu is None:
3746 oInstr.sMinCpu = sMinCpu;
3747 elif oInstr.sMinCpu != sMinCpu:
3748 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3749
3750 _ = iEndLine;
3751 return True;
3752
3753 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3754 """
3755 Tag: \@opcpuid
3756 Value: none | <CPUID flag specifier>
3757
3758 CPUID feature bit which is required for the instruction to be present.
3759 """
3760 oInstr = self.ensureInstructionForOpTag(iTagLine);
3761
3762 # Flatten as a space separated list, split it up and validate the values.
3763 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3764 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3765 asCpuIds = [];
3766 else:
3767 fRc = True;
3768 for iCpuId, sCpuId in enumerate(asCpuIds):
3769 if sCpuId not in g_kdCpuIdFlags:
3770 if sCpuId.strip() in g_kdCpuIdFlags:
3771 sCpuId[iCpuId] = sCpuId.strip();
3772 else:
3773 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3774 if not fRc:
3775 return False;
3776
3777 # Append them.
3778 for sCpuId in asCpuIds:
3779 if sCpuId not in oInstr.asCpuIds:
3780 oInstr.asCpuIds.append(sCpuId);
3781 else:
3782 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3783
3784 _ = iEndLine;
3785 return True;
3786
3787 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3788 """
3789 Tag: \@opgroup
3790 Value: op_grp1[_subgrp2[_subsubgrp3]]
3791
3792 Instruction grouping.
3793 """
3794 oInstr = self.ensureInstructionForOpTag(iTagLine);
3795
3796 # Flatten as a space separated list, split it up and validate the values.
3797 asGroups = self.flattenAllSections(aasSections).split();
3798 if len(asGroups) != 1:
3799 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3800 sGroup = asGroups[0];
3801 if not self.oReGroupName.match(sGroup):
3802 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3803 % (sTag, sGroup, self.oReGroupName.pattern));
3804
3805 # Set it.
3806 if oInstr.sGroup is not None:
3807 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3808 oInstr.sGroup = sGroup;
3809
3810 _ = iEndLine;
3811 return True;
3812
3813 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3814 """
3815 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3816 Value: <invalid opcode behaviour style>
3817
3818 The \@opunused indicates the specification is for a currently unused
3819 instruction encoding.
3820
3821 The \@opinvalid indicates the specification is for an invalid currently
3822 instruction encoding (like UD2).
3823
3824 The \@opinvlstyle just indicates how CPUs decode the instruction when
3825 not supported (\@opcpuid, \@opmincpu) or disabled.
3826 """
3827 oInstr = self.ensureInstructionForOpTag(iTagLine);
3828
3829 # Flatten as a space separated list, split it up and validate the values.
3830 asStyles = self.flattenAllSections(aasSections).split();
3831 if len(asStyles) != 1:
3832 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3833 sStyle = asStyles[0];
3834 if sStyle not in g_kdInvalidStyles:
3835 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3836 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3837 # Set it.
3838 if oInstr.sInvalidStyle is not None:
3839 return self.errorComment(iTagLine,
3840 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3841 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3842 oInstr.sInvalidStyle = sStyle;
3843 if sTag == '@opunused':
3844 oInstr.fUnused = True;
3845 elif sTag == '@opinvalid':
3846 oInstr.fInvalid = True;
3847
3848 _ = iEndLine;
3849 return True;
3850
3851 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3852 """
3853 Tag: \@optest
3854 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3855 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3856
3857 The main idea here is to generate basic instruction tests.
3858
3859 The probably simplest way of handling the diverse input, would be to use
3860 it to produce size optimized byte code for a simple interpreter that
3861 modifies the register input and output states.
3862
3863 An alternative to the interpreter would be creating multiple tables,
3864 but that becomes rather complicated wrt what goes where and then to use
3865 them in an efficient manner.
3866 """
3867 oInstr = self.ensureInstructionForOpTag(iTagLine);
3868
3869 #
3870 # Do it section by section.
3871 #
3872 for asSectionLines in aasSections:
3873 #
3874 # Sort the input into outputs, inputs and selector conditions.
3875 #
3876 sFlatSection = self.flattenAllSections([asSectionLines,]);
3877 if not sFlatSection:
3878 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3879 continue;
3880 oTest = InstructionTest(oInstr);
3881
3882 asSelectors = [];
3883 asInputs = [];
3884 asOutputs = [];
3885 asCur = asOutputs;
3886 fRc = True;
3887 asWords = sFlatSection.split();
3888 for iWord in range(len(asWords) - 1, -1, -1):
3889 sWord = asWords[iWord];
3890 # Check for array switchers.
3891 if sWord == '->':
3892 if asCur != asOutputs:
3893 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3894 break;
3895 asCur = asInputs;
3896 elif sWord == '/':
3897 if asCur != asInputs:
3898 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3899 break;
3900 asCur = asSelectors;
3901 else:
3902 asCur.insert(0, sWord);
3903
3904 #
3905 # Validate and add selectors.
3906 #
3907 for sCond in asSelectors:
3908 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3909 oSelector = None;
3910 for sOp in TestSelector.kasCompareOps:
3911 off = sCondExp.find(sOp);
3912 if off >= 0:
3913 sVariable = sCondExp[:off];
3914 sValue = sCondExp[off + len(sOp):];
3915 if sVariable in TestSelector.kdVariables:
3916 if sValue in TestSelector.kdVariables[sVariable]:
3917 oSelector = TestSelector(sVariable, sOp, sValue);
3918 else:
3919 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3920 % ( sTag, sValue, sCond,
3921 TestSelector.kdVariables[sVariable].keys(),));
3922 else:
3923 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3924 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3925 break;
3926 if oSelector is not None:
3927 for oExisting in oTest.aoSelectors:
3928 if oExisting.sVariable == oSelector.sVariable:
3929 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3930 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3931 oTest.aoSelectors.append(oSelector);
3932 else:
3933 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3934
3935 #
3936 # Validate outputs and inputs, adding them to the test as we go along.
3937 #
3938 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3939 asValidFieldKinds = [ 'both', sDesc, ];
3940 for sItem in asItems:
3941 oItem = None;
3942 for sOp in TestInOut.kasOperators:
3943 off = sItem.find(sOp);
3944 if off < 0:
3945 continue;
3946 sField = sItem[:off];
3947 sValueType = sItem[off + len(sOp):];
3948 if sField in TestInOut.kdFields \
3949 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3950 asSplit = sValueType.split(':', 1);
3951 sValue = asSplit[0];
3952 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3953 if sType in TestInOut.kdTypes:
3954 oValid = TestInOut.kdTypes[sType].validate(sValue);
3955 if oValid is True:
3956 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3957 oItem = TestInOut(sField, sOp, sValue, sType);
3958 else:
3959 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3960 % ( sTag, sDesc, sItem, ));
3961 else:
3962 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3963 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3964 else:
3965 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3966 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3967 else:
3968 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
3969 % ( sTag, sDesc, sField, sItem,
3970 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
3971 if asVal[1] in asValidFieldKinds]),));
3972 break;
3973 if oItem is not None:
3974 for oExisting in aoDst:
3975 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
3976 self.errorComment(iTagLine,
3977 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
3978 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
3979 aoDst.append(oItem);
3980 else:
3981 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
3982
3983 #
3984 # .
3985 #
3986 if fRc:
3987 oInstr.aoTests.append(oTest);
3988 else:
3989 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
3990 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
3991 % (sTag, asSelectors, asInputs, asOutputs,));
3992
3993 _ = iEndLine;
3994 return True;
3995
3996 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
3997 """
3998 Numbered \@optest tag. Either \@optest42 or \@optest[42].
3999 """
4000 oInstr = self.ensureInstructionForOpTag(iTagLine);
4001
4002 iTest = 0;
4003 if sTag[-1] == ']':
4004 iTest = int(sTag[8:-1]);
4005 else:
4006 iTest = int(sTag[7:]);
4007
4008 if iTest != len(oInstr.aoTests):
4009 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4010 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4011
4012 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4013 """
4014 Tag: \@optestign | \@optestignore
4015 Value: <value is ignored>
4016
4017 This is a simple trick to ignore a test while debugging another.
4018
4019 See also \@oponlytest.
4020 """
4021 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4022 return True;
4023
4024 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4025 """
4026 Tag: \@opcopytests
4027 Value: <opstat | function> [..]
4028 Example: \@opcopytests add_Eb_Gb
4029
4030 Trick to avoid duplicating tests for different encodings of the same
4031 operation.
4032 """
4033 oInstr = self.ensureInstructionForOpTag(iTagLine);
4034
4035 # Flatten, validate and append the copy job to the instruction. We execute
4036 # them after parsing all the input so we can handle forward references.
4037 asToCopy = self.flattenAllSections(aasSections).split();
4038 if not asToCopy:
4039 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4040 for sToCopy in asToCopy:
4041 if sToCopy not in oInstr.asCopyTests:
4042 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4043 oInstr.asCopyTests.append(sToCopy);
4044 else:
4045 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4046 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4047 else:
4048 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4049
4050 _ = iEndLine;
4051 return True;
4052
4053 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4054 """
4055 Tag: \@oponlytest | \@oponly
4056 Value: none
4057
4058 Only test instructions with this tag. This is a trick that is handy
4059 for singling out one or two new instructions or tests.
4060
4061 See also \@optestignore.
4062 """
4063 oInstr = self.ensureInstructionForOpTag(iTagLine);
4064
4065 # Validate and add instruction to only test dictionary.
4066 sValue = self.flattenAllSections(aasSections).strip();
4067 if sValue:
4068 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4069
4070 if oInstr not in g_aoOnlyTestInstructions:
4071 g_aoOnlyTestInstructions.append(oInstr);
4072
4073 _ = iEndLine;
4074 return True;
4075
4076 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4077 """
4078 Tag: \@opxcpttype
4079 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4080
4081 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4082 """
4083 oInstr = self.ensureInstructionForOpTag(iTagLine);
4084
4085 # Flatten as a space separated list, split it up and validate the values.
4086 asTypes = self.flattenAllSections(aasSections).split();
4087 if len(asTypes) != 1:
4088 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4089 sType = asTypes[0];
4090 if sType not in g_kdXcptTypes:
4091 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4092 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4093 # Set it.
4094 if oInstr.sXcptType is not None:
4095 return self.errorComment(iTagLine,
4096 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4097 % ( sTag, oInstr.sXcptType, sType,));
4098 oInstr.sXcptType = sType;
4099
4100 _ = iEndLine;
4101 return True;
4102
4103 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4104 """
4105 Tag: \@opfunction
4106 Value: <VMM function name>
4107
4108 This is for explicitly setting the IEM function name. Normally we pick
4109 this up from the FNIEMOP_XXX macro invocation after the description, or
4110 generate it from the mnemonic and operands.
4111
4112 It it thought it maybe necessary to set it when specifying instructions
4113 which implementation isn't following immediately or aren't implemented yet.
4114 """
4115 oInstr = self.ensureInstructionForOpTag(iTagLine);
4116
4117 # Flatten and validate the value.
4118 sFunction = self.flattenAllSections(aasSections);
4119 if not self.oReFunctionName.match(sFunction):
4120 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4121 % (sTag, sFunction, self.oReFunctionName.pattern));
4122
4123 if oInstr.sFunction is not None:
4124 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4125 % (sTag, oInstr.sFunction, sFunction,));
4126 oInstr.sFunction = sFunction;
4127
4128 _ = iEndLine;
4129 return True;
4130
4131 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4132 """
4133 Tag: \@opstats
4134 Value: <VMM statistics base name>
4135
4136 This is for explicitly setting the statistics name. Normally we pick
4137 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4138 the mnemonic and operands.
4139
4140 It it thought it maybe necessary to set it when specifying instructions
4141 which implementation isn't following immediately or aren't implemented yet.
4142 """
4143 oInstr = self.ensureInstructionForOpTag(iTagLine);
4144
4145 # Flatten and validate the value.
4146 sStats = self.flattenAllSections(aasSections);
4147 if not self.oReStatsName.match(sStats):
4148 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4149 % (sTag, sStats, self.oReStatsName.pattern));
4150
4151 if oInstr.sStats is not None:
4152 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4153 % (sTag, oInstr.sStats, sStats,));
4154 oInstr.sStats = sStats;
4155
4156 _ = iEndLine;
4157 return True;
4158
4159 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4160 """
4161 Tag: \@opdone
4162 Value: none
4163
4164 Used to explictily flush the instructions that have been specified.
4165 """
4166 sFlattened = self.flattenAllSections(aasSections);
4167 if sFlattened != '':
4168 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4169 _ = sTag; _ = iEndLine;
4170 return self.doneInstructions();
4171
4172 ## @}
4173
4174
4175 def parseComment(self):
4176 """
4177 Parse the current comment (self.sComment).
4178
4179 If it's a opcode specifiying comment, we reset the macro stuff.
4180 """
4181 #
4182 # Reject if comment doesn't seem to contain anything interesting.
4183 #
4184 if self.sComment.find('Opcode') < 0 \
4185 and self.sComment.find('@') < 0:
4186 return False;
4187
4188 #
4189 # Split the comment into lines, removing leading asterisks and spaces.
4190 # Also remove leading and trailing empty lines.
4191 #
4192 asLines = self.sComment.split('\n');
4193 for iLine, sLine in enumerate(asLines):
4194 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4195
4196 while asLines and not asLines[0]:
4197 self.iCommentLine += 1;
4198 asLines.pop(0);
4199
4200 while asLines and not asLines[-1]:
4201 asLines.pop(len(asLines) - 1);
4202
4203 #
4204 # Check for old style: Opcode 0x0f 0x12
4205 #
4206 if asLines[0].startswith('Opcode '):
4207 self.parseCommentOldOpcode(asLines);
4208
4209 #
4210 # Look for @op* tagged data.
4211 #
4212 cOpTags = 0;
4213 sFlatDefault = None;
4214 sCurTag = '@default';
4215 iCurTagLine = 0;
4216 asCurSection = [];
4217 aasSections = [ asCurSection, ];
4218 for iLine, sLine in enumerate(asLines):
4219 if not sLine.startswith('@'):
4220 if sLine:
4221 asCurSection.append(sLine);
4222 elif asCurSection:
4223 asCurSection = [];
4224 aasSections.append(asCurSection);
4225 else:
4226 #
4227 # Process the previous tag.
4228 #
4229 if not asCurSection and len(aasSections) > 1:
4230 aasSections.pop(-1);
4231 if sCurTag in self.dTagHandlers:
4232 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4233 cOpTags += 1;
4234 elif sCurTag.startswith('@op'):
4235 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4236 elif sCurTag == '@default':
4237 sFlatDefault = self.flattenAllSections(aasSections);
4238 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4239 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4240 elif sCurTag in ['@encoding', '@opencoding']:
4241 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4242
4243 #
4244 # New tag.
4245 #
4246 asSplit = sLine.split(None, 1);
4247 sCurTag = asSplit[0].lower();
4248 if len(asSplit) > 1:
4249 asCurSection = [asSplit[1],];
4250 else:
4251 asCurSection = [];
4252 aasSections = [asCurSection, ];
4253 iCurTagLine = iLine;
4254
4255 #
4256 # Process the final tag.
4257 #
4258 if not asCurSection and len(aasSections) > 1:
4259 aasSections.pop(-1);
4260 if sCurTag in self.dTagHandlers:
4261 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4262 cOpTags += 1;
4263 elif sCurTag.startswith('@op'):
4264 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4265 elif sCurTag == '@default':
4266 sFlatDefault = self.flattenAllSections(aasSections);
4267
4268 #
4269 # Don't allow default text in blocks containing @op*.
4270 #
4271 if cOpTags > 0 and sFlatDefault:
4272 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4273
4274 return True;
4275
4276 def parseMacroInvocation(self, sInvocation):
4277 """
4278 Parses a macro invocation.
4279
4280 Returns a tuple, first element is the offset following the macro
4281 invocation. The second element is a list of macro arguments, where the
4282 zero'th is the macro name.
4283 """
4284 # First the name.
4285 offOpen = sInvocation.find('(');
4286 if offOpen <= 0:
4287 self.raiseError("macro invocation open parenthesis not found");
4288 sName = sInvocation[:offOpen].strip();
4289 if not self.oReMacroName.match(sName):
4290 return self.error("invalid macro name '%s'" % (sName,));
4291 asRet = [sName, ];
4292
4293 # Arguments.
4294 iLine = self.iLine;
4295 cDepth = 1;
4296 off = offOpen + 1;
4297 offStart = off;
4298 chQuote = None;
4299 while cDepth > 0:
4300 if off >= len(sInvocation):
4301 if iLine >= len(self.asLines):
4302 self.error('macro invocation beyond end of file');
4303 return (off, asRet);
4304 sInvocation += self.asLines[iLine];
4305 iLine += 1;
4306 ch = sInvocation[off];
4307
4308 if chQuote:
4309 if ch == '\\' and off + 1 < len(sInvocation):
4310 off += 1;
4311 elif ch == chQuote:
4312 chQuote = None;
4313 elif ch in ('"', '\'',):
4314 chQuote = ch;
4315 elif ch in (',', ')',):
4316 if cDepth == 1:
4317 asRet.append(sInvocation[offStart:off].strip());
4318 offStart = off + 1;
4319 if ch == ')':
4320 cDepth -= 1;
4321 elif ch == '(':
4322 cDepth += 1;
4323 off += 1;
4324
4325 return (off, asRet);
4326
4327 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4328 """
4329 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4330 """
4331 offHit = sCode.find(sMacro);
4332 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4333 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4334 return (offHit + offAfter, asRet);
4335 return (len(sCode), None);
4336
4337 def findAndParseMacroInvocation(self, sCode, sMacro):
4338 """
4339 Returns None if not found, arguments as per parseMacroInvocation if found.
4340 """
4341 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4342
4343 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4344 """
4345 Returns same as findAndParseMacroInvocation.
4346 """
4347 for sMacro in asMacro:
4348 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4349 if asRet is not None:
4350 return asRet;
4351 return None;
4352
4353 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4354 sDisHints, sIemHints, asOperands):
4355 """
4356 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4357 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4358 """
4359 #
4360 # Some invocation checks.
4361 #
4362 if sUpper != sUpper.upper():
4363 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4364 if sLower != sLower.lower():
4365 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4366 if sUpper.lower() != sLower:
4367 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4368 if not self.oReMnemonic.match(sLower):
4369 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4370
4371 #
4372 # Check if sIemHints tells us to not consider this macro invocation.
4373 #
4374 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4375 return True;
4376
4377 # Apply to the last instruction only for now.
4378 if not self.aoCurInstrs:
4379 self.addInstruction();
4380 oInstr = self.aoCurInstrs[-1];
4381 if oInstr.iLineMnemonicMacro == -1:
4382 oInstr.iLineMnemonicMacro = self.iLine;
4383 else:
4384 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4385 % (sMacro, oInstr.iLineMnemonicMacro,));
4386
4387 # Mnemonic
4388 if oInstr.sMnemonic is None:
4389 oInstr.sMnemonic = sLower;
4390 elif oInstr.sMnemonic != sLower:
4391 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4392
4393 # Process operands.
4394 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4395 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4396 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4397 for iOperand, sType in enumerate(asOperands):
4398 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4399 if sWhere is None:
4400 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4401 if iOperand < len(oInstr.aoOperands): # error recovery.
4402 sWhere = oInstr.aoOperands[iOperand].sWhere;
4403 sType = oInstr.aoOperands[iOperand].sType;
4404 else:
4405 sWhere = 'reg';
4406 sType = 'Gb';
4407 if iOperand == len(oInstr.aoOperands):
4408 oInstr.aoOperands.append(Operand(sWhere, sType))
4409 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4410 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4411 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4412 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4413
4414 # Encoding.
4415 if sForm not in g_kdIemForms:
4416 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4417 else:
4418 if oInstr.sEncoding is None:
4419 oInstr.sEncoding = g_kdIemForms[sForm][0];
4420 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4421 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4422 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4423
4424 # Check the parameter locations for the encoding.
4425 if g_kdIemForms[sForm][1] is not None:
4426 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4427 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4428 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4429 else:
4430 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4431 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4432 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4433 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4434 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4435 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4436 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4437 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4438 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4439 or sForm.replace('VEX','').find('V') < 0) ):
4440 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4441 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4442 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4443 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4444 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4445 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4446 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4447 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4448 oInstr.aoOperands[iOperand].sWhere));
4449
4450
4451 # Check @opcodesub
4452 if oInstr.sSubOpcode \
4453 and g_kdIemForms[sForm][2] \
4454 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4455 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4456 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4457
4458 # Stats.
4459 if not self.oReStatsName.match(sStats):
4460 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4461 elif oInstr.sStats is None:
4462 oInstr.sStats = sStats;
4463 elif oInstr.sStats != sStats:
4464 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4465 % (sMacro, oInstr.sStats, sStats,));
4466
4467 # Process the hints (simply merge with @ophints w/o checking anything).
4468 for sHint in sDisHints.split('|'):
4469 sHint = sHint.strip();
4470 if sHint.startswith('DISOPTYPE_'):
4471 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4472 if sShortHint in g_kdHints:
4473 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4474 else:
4475 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4476 elif sHint != '0':
4477 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4478
4479 for sHint in sIemHints.split('|'):
4480 sHint = sHint.strip();
4481 if sHint.startswith('IEMOPHINT_'):
4482 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4483 if sShortHint in g_kdHints:
4484 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4485 else:
4486 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4487 elif sHint != '0':
4488 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4489
4490 _ = sAsm;
4491 return True;
4492
4493 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4494 """
4495 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4496 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4497 """
4498 if not asOperands:
4499 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4500 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4501 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4502
4503 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4504 """
4505 Process a IEM_MC_BEGIN macro invocation.
4506 """
4507 if self.fDebugMc:
4508 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4509 #self.debug('%s<eos>' % (sCode,));
4510
4511 # Check preconditions.
4512 if not self.sCurFunction:
4513 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4514 if self.oCurMcBlock:
4515 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4516
4517 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4518 cchIndent = offBeginStatementInCodeStr;
4519 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4520 if offPrevNewline >= 0:
4521 cchIndent -= offPrevNewline + 1;
4522 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.sCurFunction));
4523
4524 # Start a new block.
4525 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4526 self.sCurFunction, self.iMcBlockInFunc, cchIndent);
4527 g_aoMcBlocks.append(self.oCurMcBlock);
4528 self.cTotalMcBlocks += 1;
4529 self.iMcBlockInFunc += 1;
4530 return True;
4531
4532 def workerIemMcEnd(self, offEndStatementInLine):
4533 """
4534 Process a IEM_MC_END macro invocation.
4535 """
4536 if self.fDebugMc:
4537 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4538
4539 # Check preconditions.
4540 if not self.oCurMcBlock:
4541 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4542
4543 #
4544 # Complete and discard the current block.
4545 #
4546 # HACK ALERT! For blocks orginating from macro expansion the start and
4547 # end line will be the same, but the line has multiple
4548 # newlines inside it. So, we have to do some extra tricks
4549 # to get the lines out of there. We ASSUME macros aren't
4550 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4551 #
4552 if self.iLine > self.oCurMcBlock.iBeginLine:
4553 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4554 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4555 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4556 else:
4557 sRawLine = self.asLines[self.iLine - 1];
4558
4559 off = sRawLine.find('\n', offEndStatementInLine);
4560 if off > 0:
4561 sRawLine = sRawLine[:off + 1];
4562
4563 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4564 sRawLine = sRawLine[off:];
4565 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4566 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4567
4568 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4569
4570 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4571 self.oCurMcBlock = None;
4572 return True;
4573
4574 def checkCodeForMacro(self, sCode, offLine):
4575 """
4576 Checks code for relevant macro invocation.
4577 """
4578
4579 #
4580 # Scan macro invocations.
4581 #
4582 if sCode.find('(') > 0:
4583 # Look for instruction decoder function definitions. ASSUME single line.
4584 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4585 [ 'FNIEMOP_DEF',
4586 'FNIEMOPRM_DEF',
4587 'FNIEMOP_STUB',
4588 'FNIEMOP_STUB_1',
4589 'FNIEMOP_UD_STUB',
4590 'FNIEMOP_UD_STUB_1' ]);
4591 if asArgs is not None:
4592 self.sCurFunction = asArgs[1];
4593 #self.debug('%s: sCurFunction=%s' % (self.iLine, self.sCurFunction,));
4594
4595 if not self.aoCurInstrs:
4596 self.addInstruction();
4597 for oInstr in self.aoCurInstrs:
4598 if oInstr.iLineFnIemOpMacro == -1:
4599 oInstr.iLineFnIemOpMacro = self.iLine;
4600 else:
4601 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4602 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4603 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4604 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4605 if asArgs[0].find('STUB') > 0:
4606 self.doneInstructions(fEndOfFunction = True);
4607 return True;
4608
4609 # Check for worker function definitions, so we can get a context for MC blocks.
4610 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4611 [ 'FNIEMOP_DEF_1',
4612 'FNIEMOP_DEF_2', ]);
4613 if asArgs is not None:
4614 self.sCurFunction = asArgs[1];
4615 #self.debug('%s: sCurFunction=%s (%s)' % (self.iLine, self.sCurFunction, asArgs[0]));
4616 return True;
4617
4618 # IEMOP_HLP_DONE_VEX_DECODING_*
4619 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4620 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4621 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4622 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4623 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4624 ]);
4625 if asArgs is not None:
4626 sMacro = asArgs[0];
4627 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4628 for oInstr in self.aoCurInstrs:
4629 if 'vex_l_zero' not in oInstr.dHints:
4630 if oInstr.iLineMnemonicMacro >= 0:
4631 self.errorOnLine(oInstr.iLineMnemonicMacro,
4632 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4633 oInstr.dHints['vex_l_zero'] = True;
4634
4635 #
4636 # IEMOP_MNEMONIC*
4637 #
4638 if sCode.find('IEMOP_MNEMONIC') >= 0:
4639 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4640 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4641 if asArgs is not None:
4642 if len(self.aoCurInstrs) == 1:
4643 oInstr = self.aoCurInstrs[0];
4644 if oInstr.sStats is None:
4645 oInstr.sStats = asArgs[1];
4646 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4647
4648 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4649 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4650 if asArgs is not None:
4651 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4652 asArgs[7], []);
4653 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4654 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4655 if asArgs is not None:
4656 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4657 asArgs[8], [asArgs[6],]);
4658 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4659 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4660 if asArgs is not None:
4661 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4662 asArgs[9], [asArgs[6], asArgs[7]]);
4663 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4664 # a_fIemHints)
4665 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4666 if asArgs is not None:
4667 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4668 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4669 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4670 # a_fIemHints)
4671 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4672 if asArgs is not None:
4673 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4674 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4675
4676 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4677 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4678 if asArgs is not None:
4679 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4680 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4681 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4682 if asArgs is not None:
4683 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4684 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4685 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4686 if asArgs is not None:
4687 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4688 [asArgs[4], asArgs[5],]);
4689 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4690 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4691 if asArgs is not None:
4692 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4693 [asArgs[4], asArgs[5], asArgs[6],]);
4694 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4695 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4696 if asArgs is not None:
4697 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4698 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4699
4700 #
4701 # IEM_MC_BEGIN + IEM_MC_END.
4702 # We must support multiple instances per code snippet.
4703 #
4704 offCode = sCode.find('IEM_MC_');
4705 if offCode >= 0:
4706 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4707 if oMatch.group(1) == 'END':
4708 self.workerIemMcEnd(offLine + oMatch.start());
4709 else:
4710 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4711 return True;
4712
4713 return False;
4714
4715 def workerPreProcessRecreateMacroRegex(self):
4716 """
4717 Recreates self.oReMacros when self.dMacros changes.
4718 """
4719 if self.dMacros:
4720 sRegex = '';
4721 for sName, oMacro in self.dMacros.items():
4722 if sRegex:
4723 sRegex += '|' + sName;
4724 else:
4725 sRegex = '\\b(' + sName;
4726 if oMacro.asArgs is not None:
4727 sRegex += '\s*\(';
4728 else:
4729 sRegex += '\\b';
4730 sRegex += ')';
4731 self.oReMacros = re.compile(sRegex);
4732 else:
4733 self.oReMacros = None;
4734 return True;
4735
4736 def workerPreProcessDefine(self, sRest):
4737 """
4738 Handles a macro #define, the sRest is what follows after the directive word.
4739 """
4740
4741 #
4742 # If using line continutation, just concat all the lines together,
4743 # preserving the newline character but not the escaping.
4744 #
4745 iLineStart = self.iLine;
4746 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4747 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4748 self.iLine += 1;
4749 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4750
4751 #
4752 # Use regex to split out the name, argument list and body.
4753 # If this fails, we assume it's a simple macro.
4754 #
4755 oMatch = self.oReHashDefine2.match(sRest);
4756 if oMatch:
4757 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4758 sBody = oMatch.group(3);
4759 else:
4760 oMatch = self.oReHashDefine3.match(sRest);
4761 if not oMatch:
4762 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4763 return self.error('bogus macro definition: %s' % (sRest,));
4764 asArgs = None;
4765 sBody = oMatch.group(2);
4766 sName = oMatch.group(1);
4767 assert sName == sName.strip();
4768 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4769
4770 #
4771 # Is this of any interest to us? We do NOT support MC blocks wihtin
4772 # nested macro expansion, just to avoid lots of extra work.
4773 #
4774 if sBody.find("IEM_MC_BEGIN") < 0:
4775 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4776 return True;
4777
4778 #
4779 # Add the macro.
4780 #
4781 if self.fDebugPreProc:
4782 self.debug('#define %s on line %u' % (sName, self.iLine,));
4783 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4784 return self.workerPreProcessRecreateMacroRegex();
4785
4786 def workerPreProcessUndef(self, sRest):
4787 """
4788 Handles a macro #undef, the sRest is what follows after the directive word.
4789 """
4790 # Quick comment strip and isolate the name.
4791 offSlash = sRest.find('/');
4792 if offSlash > 0:
4793 sRest = sRest[:offSlash];
4794 sName = sRest.strip();
4795
4796 # Remove the macro if we're clocking it.
4797 if sName in self.dMacros:
4798 if self.fDebugPreProc:
4799 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4800 del self.dMacros[sName];
4801 return self.workerPreProcessRecreateMacroRegex();
4802
4803 return True;
4804
4805 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4806 """
4807 Handles a preprocessor directive.
4808 """
4809 oMatch = self.oReHashDefine.match(sLine);
4810 if oMatch:
4811 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4812
4813 oMatch = self.oReHashUndef.match(sLine);
4814 if oMatch:
4815 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4816 return False;
4817
4818 def expandMacros(self, sLine, oMatch):
4819 """
4820 Expands macros we know about in the given line.
4821 Currently we ASSUME there is only one and that is what oMatch matched.
4822 """
4823 #
4824 # Get our bearings.
4825 #
4826 offMatch = oMatch.start();
4827 sName = oMatch.group(1);
4828 assert sName == sLine[oMatch.start() : oMatch.end()];
4829 fWithArgs = sName.endswith('(');
4830 if fWithArgs:
4831 sName = sName[:-1].strip();
4832 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4833
4834 #
4835 # Deal with simple macro invocations w/o parameters.
4836 #
4837 if not fWithArgs:
4838 if self.fDebugPreProc:
4839 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4840 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4841
4842 #
4843 # Complicated macro with parameters.
4844 # Start by extracting the parameters. ASSUMES they are all on the same line!
4845 #
4846 cLevel = 1;
4847 offCur = oMatch.end();
4848 offCurArg = offCur;
4849 asArgs = [];
4850 while True:
4851 if offCur >= len(sLine):
4852 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4853 ch = sLine[offCur];
4854 if ch == '(':
4855 cLevel += 1;
4856 elif ch == ')':
4857 cLevel -= 1;
4858 if cLevel == 0:
4859 asArgs.append(sLine[offCurArg:offCur].strip());
4860 break;
4861 elif ch == ',' and cLevel == 1:
4862 asArgs.append(sLine[offCurArg:offCur].strip());
4863 offCurArg = offCur + 1;
4864 offCur += 1;
4865 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4866 asArgs = [];
4867 if len(oMacro.asArgs) != len(asArgs):
4868 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4869
4870 #
4871 # Do the expanding.
4872 #
4873 if self.fDebugPreProc:
4874 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4875 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4876
4877 def parse(self):
4878 """
4879 Parses the given file.
4880 Returns number or errors.
4881 Raises exception on fatal trouble.
4882 """
4883 #self.debug('Parsing %s' % (self.sSrcFile,));
4884
4885 while self.iLine < len(self.asLines):
4886 sLine = self.asLines[self.iLine];
4887 self.iLine += 1;
4888 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4889
4890 # Expand macros we know about if we're currently in code.
4891 if self.iState == self.kiCode and self.oReMacros:
4892 oMatch = self.oReMacros.search(sLine);
4893 if oMatch:
4894 sLine = self.expandMacros(sLine, oMatch);
4895 if self.fDebugPreProc:
4896 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4897 self.asLines[self.iLine - 1] = sLine;
4898
4899 # Look for comments.
4900 offSlash = sLine.find('/');
4901 if offSlash >= 0:
4902 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4903 offLine = 0;
4904 while offLine < len(sLine):
4905 if self.iState == self.kiCode:
4906 # Look for substantial multiline comment so we pass the following MC as a whole line:
4907 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4908 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4909 offHit = sLine.find('/*', offLine);
4910 while offHit >= 0:
4911 offEnd = sLine.find('*/', offHit + 2);
4912 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4913 break;
4914 offHit = sLine.find('/*', offEnd);
4915
4916 if offHit >= 0:
4917 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4918 self.sComment = '';
4919 self.iCommentLine = self.iLine;
4920 self.iState = self.kiCommentMulti;
4921 offLine = offHit + 2;
4922 else:
4923 self.checkCodeForMacro(sLine[offLine:], offLine);
4924 offLine = len(sLine);
4925
4926 elif self.iState == self.kiCommentMulti:
4927 offHit = sLine.find('*/', offLine);
4928 if offHit >= 0:
4929 self.sComment += sLine[offLine:offHit];
4930 self.iState = self.kiCode;
4931 offLine = offHit + 2;
4932 self.parseComment();
4933 else:
4934 self.sComment += sLine[offLine:];
4935 offLine = len(sLine);
4936 else:
4937 assert False;
4938 # C++ line comment.
4939 elif offSlash > 0:
4940 self.checkCodeForMacro(sLine[:offSlash], 0);
4941
4942 # No slash, but append the line if in multi-line comment.
4943 elif self.iState == self.kiCommentMulti:
4944 #self.debug('line %d: multi' % (self.iLine,));
4945 self.sComment += sLine;
4946
4947 # No slash, but check if this is a macro #define or #undef, since we
4948 # need to be able to selectively expand the ones containing MC blocks.
4949 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
4950 if self.fDebugPreProc:
4951 self.debug('line %d: pre-proc' % (self.iLine,));
4952 self.checkPreProcessorDirectiveForDefineUndef(sLine);
4953
4954 # No slash, but check code line for relevant macro.
4955 elif ( self.iState == self.kiCode
4956 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
4957 #self.debug('line %d: macro' % (self.iLine,));
4958 self.checkCodeForMacro(sLine, 0);
4959
4960 # If the line is a '}' in the first position, complete the instructions.
4961 elif self.iState == self.kiCode and sLine[0] == '}':
4962 #self.debug('line %d: }' % (self.iLine,));
4963 self.doneInstructions(fEndOfFunction = True);
4964
4965 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
4966 # so we can check/add @oppfx info from it.
4967 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
4968 self.parseFunctionTable(sLine);
4969
4970 self.doneInstructions(fEndOfFunction = True);
4971 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
4972 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
4973 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
4974 return self.printErrors();
4975
4976## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
4977g_oParsedCommonBodyMacros = None # type: SimpleParser
4978
4979def __parseFileByName(sSrcFile, sDefaultMap):
4980 """
4981 Parses one source file for instruction specfications.
4982 """
4983 #
4984 # Read sSrcFile into a line array.
4985 #
4986 try:
4987 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
4988 except Exception as oXcpt:
4989 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
4990 try:
4991 asLines = oFile.readlines();
4992 except Exception as oXcpt:
4993 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
4994 finally:
4995 oFile.close();
4996
4997 #
4998 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
4999 # can use the macros from it when processing the other files.
5000 #
5001 global g_oParsedCommonBodyMacros;
5002 if g_oParsedCommonBodyMacros is None:
5003 # Locate the file.
5004 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5005 if not os.path.isfile(sCommonBodyMacros):
5006 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5007
5008 # Read it.
5009 try:
5010 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5011 asIncFiles = oIncFile.readlines();
5012 except Exception as oXcpt:
5013 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5014
5015 # Parse it.
5016 try:
5017 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5018 if oParser.parse() != 0:
5019 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5020 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5021 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5022 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5023 oParser.cTotalMcBlocks,
5024 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5025 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5026 except ParserException as oXcpt:
5027 print(str(oXcpt), file = sys.stderr);
5028 raise;
5029 g_oParsedCommonBodyMacros = oParser;
5030
5031 #
5032 # Do the parsing.
5033 #
5034 try:
5035 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5036 return (oParser.parse(), oParser) ;
5037 except ParserException as oXcpt:
5038 print(str(oXcpt), file = sys.stderr);
5039 raise;
5040
5041
5042def __doTestCopying():
5043 """
5044 Executes the asCopyTests instructions.
5045 """
5046 asErrors = [];
5047 for oDstInstr in g_aoAllInstructions:
5048 if oDstInstr.asCopyTests:
5049 for sSrcInstr in oDstInstr.asCopyTests:
5050 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5051 if oSrcInstr:
5052 aoSrcInstrs = [oSrcInstr,];
5053 else:
5054 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5055 if aoSrcInstrs:
5056 for oSrcInstr in aoSrcInstrs:
5057 if oSrcInstr != oDstInstr:
5058 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5059 else:
5060 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5061 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5062 else:
5063 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5064 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5065
5066 if asErrors:
5067 sys.stderr.write(u''.join(asErrors));
5068 return len(asErrors);
5069
5070
5071def __applyOnlyTest():
5072 """
5073 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5074 all other instructions so that only these get tested.
5075 """
5076 if g_aoOnlyTestInstructions:
5077 for oInstr in g_aoAllInstructions:
5078 if oInstr.aoTests:
5079 if oInstr not in g_aoOnlyTestInstructions:
5080 oInstr.aoTests = [];
5081 return 0;
5082
5083## List of all main instruction files and their default maps.
5084g_aasAllInstrFilesAndDefaultMap = (
5085 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5086 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5087 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5088 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5089 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5090 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5091 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5092 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5093 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5094);
5095
5096def __parseFilesWorker(asFilesAndDefaultMap):
5097 """
5098 Parses all the IEMAllInstruction*.cpp.h files.
5099
5100 Returns a list of the parsers on success.
5101 Raises exception on failure.
5102 """
5103 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5104 cErrors = 0;
5105 aoParsers = [];
5106 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5107 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5108 sFilename = os.path.join(sSrcDir, sFilename);
5109 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5110 cErrors += cThisErrors;
5111 aoParsers.append(oParser);
5112 cErrors += __doTestCopying();
5113 cErrors += __applyOnlyTest();
5114
5115 # Total stub stats:
5116 cTotalStubs = 0;
5117 for oInstr in g_aoAllInstructions:
5118 cTotalStubs += oInstr.fStub;
5119 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5120 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5121 file = sys.stderr);
5122
5123 if cErrors != 0:
5124 raise Exception('%d parse errors' % (cErrors,));
5125 return aoParsers;
5126
5127
5128def parseFiles(asFiles):
5129 """
5130 Parses a selection of IEMAllInstruction*.cpp.h files.
5131
5132 Returns a list of the parsers on success.
5133 Raises exception on failure.
5134 """
5135 # Look up default maps for the files and call __parseFilesWorker to do the job.
5136 asFilesAndDefaultMap = [];
5137 for sFilename in asFiles:
5138 sName = os.path.split(sFilename)[1].lower();
5139 sMap = None;
5140 for asCur in g_aasAllInstrFilesAndDefaultMap:
5141 if asCur[0].lower() == sName:
5142 sMap = asCur[1];
5143 break;
5144 if not sMap:
5145 raise Exception('Unable to classify file: %s' % (sFilename,));
5146 asFilesAndDefaultMap.append((sFilename, sMap));
5147
5148 return __parseFilesWorker(asFilesAndDefaultMap);
5149
5150
5151def parseAll():
5152 """
5153 Parses all the IEMAllInstruction*.cpp.h files.
5154
5155 Returns a list of the parsers on success.
5156 Raises exception on failure.
5157 """
5158 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5159
5160
5161#
5162# Generators (may perhaps move later).
5163#
5164def __formatDisassemblerTableEntry(oInstr):
5165 """
5166 """
5167 sMacro = 'OP';
5168 cMaxOperands = 3;
5169 if len(oInstr.aoOperands) > 3:
5170 sMacro = 'OPVEX'
5171 cMaxOperands = 4;
5172 assert len(oInstr.aoOperands) <= cMaxOperands;
5173
5174 #
5175 # Format string.
5176 #
5177 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5178 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5179 sTmp += ' ' if iOperand == 0 else ',';
5180 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5181 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5182 else:
5183 sTmp += g_kdOpTypes[oOperand.sType][2];
5184 sTmp += '",';
5185 asColumns = [ sTmp, ];
5186
5187 #
5188 # Decoders.
5189 #
5190 iStart = len(asColumns);
5191 if oInstr.sEncoding is None:
5192 pass;
5193 elif oInstr.sEncoding == 'ModR/M':
5194 # ASSUME the first operand is using the ModR/M encoding
5195 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5196 asColumns.append('IDX_ParseModRM,');
5197 elif oInstr.sEncoding in [ 'prefix', ]:
5198 for oOperand in oInstr.aoOperands:
5199 asColumns.append('0,');
5200 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5201 pass;
5202 elif oInstr.sEncoding == 'VEX.ModR/M':
5203 asColumns.append('IDX_ParseModRM,');
5204 elif oInstr.sEncoding == 'vex2':
5205 asColumns.append('IDX_ParseVex2b,')
5206 elif oInstr.sEncoding == 'vex3':
5207 asColumns.append('IDX_ParseVex3b,')
5208 elif oInstr.sEncoding in g_dInstructionMaps:
5209 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5210 else:
5211 ## @todo
5212 #IDX_ParseTwoByteEsc,
5213 #IDX_ParseGrp1,
5214 #IDX_ParseShiftGrp2,
5215 #IDX_ParseGrp3,
5216 #IDX_ParseGrp4,
5217 #IDX_ParseGrp5,
5218 #IDX_Parse3DNow,
5219 #IDX_ParseGrp6,
5220 #IDX_ParseGrp7,
5221 #IDX_ParseGrp8,
5222 #IDX_ParseGrp9,
5223 #IDX_ParseGrp10,
5224 #IDX_ParseGrp12,
5225 #IDX_ParseGrp13,
5226 #IDX_ParseGrp14,
5227 #IDX_ParseGrp15,
5228 #IDX_ParseGrp16,
5229 #IDX_ParseThreeByteEsc4,
5230 #IDX_ParseThreeByteEsc5,
5231 #IDX_ParseModFence,
5232 #IDX_ParseEscFP,
5233 #IDX_ParseNopPause,
5234 #IDX_ParseInvOpModRM,
5235 assert False, str(oInstr);
5236
5237 # Check for immediates and stuff in the remaining operands.
5238 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5239 sIdx = g_kdOpTypes[oOperand.sType][0];
5240 #if sIdx != 'IDX_UseModRM':
5241 asColumns.append(sIdx + ',');
5242 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5243
5244 #
5245 # Opcode and operands.
5246 #
5247 assert oInstr.sDisEnum, str(oInstr);
5248 asColumns.append(oInstr.sDisEnum + ',');
5249 iStart = len(asColumns)
5250 for oOperand in oInstr.aoOperands:
5251 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5252 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5253
5254 #
5255 # Flags.
5256 #
5257 sTmp = '';
5258 for sHint in sorted(oInstr.dHints.keys()):
5259 sDefine = g_kdHints[sHint];
5260 if sDefine.startswith('DISOPTYPE_'):
5261 if sTmp:
5262 sTmp += ' | ' + sDefine;
5263 else:
5264 sTmp += sDefine;
5265 if sTmp:
5266 sTmp += '),';
5267 else:
5268 sTmp += '0),';
5269 asColumns.append(sTmp);
5270
5271 #
5272 # Format the columns into a line.
5273 #
5274 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5275 sLine = '';
5276 for i, s in enumerate(asColumns):
5277 if len(sLine) < aoffColumns[i]:
5278 sLine += ' ' * (aoffColumns[i] - len(sLine));
5279 else:
5280 sLine += ' ';
5281 sLine += s;
5282
5283 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5284 # DISOPTYPE_HARMLESS),
5285 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5286 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5287 return sLine;
5288
5289def __checkIfShortTable(aoTableOrdered, oMap):
5290 """
5291 Returns (iInstr, cInstructions, fShortTable)
5292 """
5293
5294 # Determin how much we can trim off.
5295 cInstructions = len(aoTableOrdered);
5296 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5297 cInstructions -= 1;
5298
5299 iInstr = 0;
5300 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5301 iInstr += 1;
5302
5303 # If we can save more than 30%, we go for the short table version.
5304 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5305 return (iInstr, cInstructions, True);
5306 _ = oMap; # Use this for overriding.
5307
5308 # Output the full table.
5309 return (0, len(aoTableOrdered), False);
5310
5311def generateDisassemblerTables(oDstFile = sys.stdout):
5312 """
5313 Generates disassembler tables.
5314
5315 Returns exit code.
5316 """
5317
5318 #
5319 # Parse all.
5320 #
5321 try:
5322 parseAll();
5323 except Exception as oXcpt:
5324 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5325 traceback.print_exc(file = sys.stderr);
5326 return 1;
5327
5328
5329 #
5330 # The disassembler uses a slightly different table layout to save space,
5331 # since several of the prefix varia
5332 #
5333 aoDisasmMaps = [];
5334 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5335 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5336 if oMap.sSelector != 'byte+pfx':
5337 aoDisasmMaps.append(oMap);
5338 else:
5339 # Split the map by prefix.
5340 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5341 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5342 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5343 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5344
5345 #
5346 # Dump each map.
5347 #
5348 asHeaderLines = [];
5349 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5350 for oMap in aoDisasmMaps:
5351 sName = oMap.sName;
5352
5353 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5354
5355 #
5356 # Get the instructions for the map and see if we can do a short version or not.
5357 #
5358 aoTableOrder = oMap.getInstructionsInTableOrder();
5359 cEntriesPerByte = oMap.getEntriesPerByte();
5360 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5361
5362 #
5363 # Output the table start.
5364 # Note! Short tables are static and only accessible via the map range record.
5365 #
5366 asLines = [];
5367 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5368 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5369 if fShortTable:
5370 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5371 else:
5372 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5373 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5374 asLines.append('{');
5375
5376 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5377 asLines.append(' /* %#04x: */' % (iInstrStart,));
5378
5379 #
5380 # Output the instructions.
5381 #
5382 iInstr = iInstrStart;
5383 while iInstr < iInstrEnd:
5384 oInstr = aoTableOrder[iInstr];
5385 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5386 if iInstr != iInstrStart:
5387 asLines.append('');
5388 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5389
5390 if oInstr is None:
5391 # Invalid. Optimize blocks of invalid instructions.
5392 cInvalidInstrs = 1;
5393 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5394 cInvalidInstrs += 1;
5395 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5396 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5397 iInstr += 0x10 * cEntriesPerByte - 1;
5398 elif cEntriesPerByte > 1:
5399 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5400 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5401 iInstr += 3;
5402 else:
5403 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5404 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5405 else:
5406 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5407 elif isinstance(oInstr, list):
5408 if len(oInstr) != 0:
5409 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5410 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5411 else:
5412 asLines.append(__formatDisassemblerTableEntry(oInstr));
5413 else:
5414 asLines.append(__formatDisassemblerTableEntry(oInstr));
5415
5416 iInstr += 1;
5417
5418 if iInstrStart >= iInstrEnd:
5419 asLines.append(' /* dummy */ INVALID_OPCODE');
5420
5421 asLines.append('};');
5422 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5423
5424 #
5425 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5426 #
5427 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5428 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5429 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5430
5431 #
5432 # Write out the lines.
5433 #
5434 oDstFile.write('\n'.join(asLines));
5435 oDstFile.write('\n');
5436 oDstFile.write('\n');
5437 #break; #for now
5438 return 0;
5439
5440if __name__ == '__main__':
5441 sys.exit(generateDisassemblerTables());
5442
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette