VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 98913

Last change on this file since 98913 was 98910, checked in by vboxsync, 23 months ago

VMM/IEM: More work on processing MC blocks. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 247.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 98910 2023-03-11 01:59:59Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 98910 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531g_kdHints = {
532 'invalid': 'DISOPTYPE_INVALID', ##<
533 'harmless': 'DISOPTYPE_HARMLESS', ##<
534 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
535 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
536 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
537 'portio': 'DISOPTYPE_PORTIO', ##<
538 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
539 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
540 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
541 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
542 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
543 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
544 'illegal': 'DISOPTYPE_ILLEGAL', ##<
545 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
546 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
547 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
548 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
549 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
550 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
551 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
552 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
553 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
554 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
555 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
556 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
557 ## (only in 16 & 32 bits mode!)
558 'avx': 'DISOPTYPE_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
559 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
560 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
561 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
562 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
563 'ignores_rexw': '', ##< Ignores REX.W.
564 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
565 'vex_l_zero': '', ##< VEX.L must be 0.
566 'vex_l_ignored': '', ##< VEX.L is ignored.
567 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
568 'lock_allowed': '', ##< Lock prefix allowed.
569};
570
571## \@opxcpttype values (see SDMv2 2.4, 2.7).
572g_kdXcptTypes = {
573 'none': [],
574 '1': [],
575 '2': [],
576 '3': [],
577 '4': [],
578 '4UA': [],
579 '5': [],
580 '5LZ': [], # LZ = VEX.L must be zero.
581 '6': [],
582 '7': [],
583 '7LZ': [],
584 '8': [],
585 '11': [],
586 '12': [],
587 'E1': [],
588 'E1NF': [],
589 'E2': [],
590 'E3': [],
591 'E3NF': [],
592 'E4': [],
593 'E4NF': [],
594 'E5': [],
595 'E5NF': [],
596 'E6': [],
597 'E6NF': [],
598 'E7NF': [],
599 'E9': [],
600 'E9NF': [],
601 'E10': [],
602 'E11': [],
603 'E12': [],
604 'E12NF': [],
605};
606
607
608def _isValidOpcodeByte(sOpcode):
609 """
610 Checks if sOpcode is a valid lower case opcode byte.
611 Returns true/false.
612 """
613 if len(sOpcode) == 4:
614 if sOpcode[:2] == '0x':
615 if sOpcode[2] in '0123456789abcdef':
616 if sOpcode[3] in '0123456789abcdef':
617 return True;
618 return False;
619
620
621class InstructionMap(object):
622 """
623 Instruction map.
624
625 The opcode map provides the lead opcode bytes (empty for the one byte
626 opcode map). An instruction can be member of multiple opcode maps as long
627 as it uses the same opcode value within the map (because of VEX).
628 """
629
630 kdEncodings = {
631 'legacy': [],
632 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
633 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
634 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
635 'xop8': [], ##< XOP prefix with vvvvv = 8
636 'xop9': [], ##< XOP prefix with vvvvv = 9
637 'xop10': [], ##< XOP prefix with vvvvv = 10
638 };
639 ## Selectors.
640 ## 1. The first value is the number of table entries required by a
641 ## decoder or disassembler for this type of selector.
642 ## 2. The second value is how many entries per opcode byte if applicable.
643 kdSelectors = {
644 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
645 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
646 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
647 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
648 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
649 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
650 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
651 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
652 };
653
654 ## Define the subentry number according to the Instruction::sPrefix
655 ## value for 'byte+pfx' selected tables.
656 kiPrefixOrder = {
657 'none': 0,
658 '0x66': 1,
659 '0xf3': 2,
660 '0xf2': 3,
661 };
662
663 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
664 sEncoding = 'legacy', sDisParse = None):
665 assert sSelector in self.kdSelectors;
666 assert sEncoding in self.kdEncodings;
667 if asLeadOpcodes is None:
668 asLeadOpcodes = [];
669 else:
670 for sOpcode in asLeadOpcodes:
671 assert _isValidOpcodeByte(sOpcode);
672 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
673
674 self.sName = sName;
675 self.sIemName = sIemName;
676 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
677 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
678 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
679 self.aoInstructions = [] # type: Instruction
680 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
681
682 def copy(self, sNewName, sPrefixFilter = None):
683 """
684 Copies the table with filtering instruction by sPrefix if not None.
685 """
686 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
687 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
688 else self.sSelector,
689 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
690 if sPrefixFilter is None:
691 oCopy.aoInstructions = list(self.aoInstructions);
692 else:
693 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
694 return oCopy;
695
696 def getTableSize(self):
697 """
698 Number of table entries. This corresponds directly to the selector.
699 """
700 return self.kdSelectors[self.sSelector][0];
701
702 def getEntriesPerByte(self):
703 """
704 Number of table entries per opcode bytes.
705
706 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
707 the others it will just return 1.
708 """
709 return self.kdSelectors[self.sSelector][1];
710
711 def getInstructionIndex(self, oInstr):
712 """
713 Returns the table index for the instruction.
714 """
715 bOpcode = oInstr.getOpcodeByte();
716
717 # The byte selectors are simple. We need a full opcode byte and need just return it.
718 if self.sSelector == 'byte':
719 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
720 return bOpcode;
721
722 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
723 if self.sSelector == 'byte+pfx':
724 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
725 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
726 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
727
728 # The other selectors needs masking and shifting.
729 if self.sSelector == '/r':
730 return (bOpcode >> 3) & 0x7;
731
732 if self.sSelector == 'mod /r':
733 return (bOpcode >> 3) & 0x1f;
734
735 if self.sSelector == 'memreg /r':
736 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
737
738 if self.sSelector == '!11 /r':
739 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
740 return (bOpcode >> 3) & 0x7;
741
742 if self.sSelector == '11 /r':
743 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
744 return (bOpcode >> 3) & 0x7;
745
746 if self.sSelector == '11':
747 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
748 return bOpcode & 0x3f;
749
750 assert False, self.sSelector;
751 return -1;
752
753 def getInstructionsInTableOrder(self):
754 """
755 Get instructions in table order.
756
757 Returns array of instructions. Normally there is exactly one
758 instruction per entry. However the entry could also be None if
759 not instruction was specified for that opcode value. Or there
760 could be a list of instructions to deal with special encodings
761 where for instance prefix (e.g. REX.W) encodes a different
762 instruction or different CPUs have different instructions or
763 prefixes in the same place.
764 """
765 # Start with empty table.
766 cTable = self.getTableSize();
767 aoTable = [None] * cTable;
768
769 # Insert the instructions.
770 for oInstr in self.aoInstructions:
771 if oInstr.sOpcode:
772 idxOpcode = self.getInstructionIndex(oInstr);
773 assert idxOpcode < cTable, str(idxOpcode);
774
775 oExisting = aoTable[idxOpcode];
776 if oExisting is None:
777 aoTable[idxOpcode] = oInstr;
778 elif not isinstance(oExisting, list):
779 aoTable[idxOpcode] = list([oExisting, oInstr]);
780 else:
781 oExisting.append(oInstr);
782
783 return aoTable;
784
785
786 def getDisasTableName(self):
787 """
788 Returns the disassembler table name for this map.
789 """
790 sName = 'g_aDisas';
791 for sWord in self.sName.split('_'):
792 if sWord == 'm': # suffix indicating modrm.mod==mem
793 sName += '_m';
794 elif sWord == 'r': # suffix indicating modrm.mod==reg
795 sName += '_r';
796 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
797 sName += '_' + sWord;
798 else:
799 sWord = sWord.replace('grp', 'Grp');
800 sWord = sWord.replace('map', 'Map');
801 sName += sWord[0].upper() + sWord[1:];
802 return sName;
803
804 def getDisasRangeName(self):
805 """
806 Returns the disassembler table range name for this map.
807 """
808 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
809
810 def isVexMap(self):
811 """ Returns True if a VEX map. """
812 return self.sEncoding.startswith('vex');
813
814
815class TestType(object):
816 """
817 Test value type.
818
819 This base class deals with integer like values. The fUnsigned constructor
820 parameter indicates the default stance on zero vs sign extending. It is
821 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
822 """
823 def __init__(self, sName, acbSizes = None, fUnsigned = True):
824 self.sName = sName;
825 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
826 self.fUnsigned = fUnsigned;
827
828 class BadValue(Exception):
829 """ Bad value exception. """
830 def __init__(self, sMessage):
831 Exception.__init__(self, sMessage);
832 self.sMessage = sMessage;
833
834 ## For ascii ~ operator.
835 kdHexInv = {
836 '0': 'f',
837 '1': 'e',
838 '2': 'd',
839 '3': 'c',
840 '4': 'b',
841 '5': 'a',
842 '6': '9',
843 '7': '8',
844 '8': '7',
845 '9': '6',
846 'a': '5',
847 'b': '4',
848 'c': '3',
849 'd': '2',
850 'e': '1',
851 'f': '0',
852 };
853
854 def get(self, sValue):
855 """
856 Get the shortest normal sized byte representation of oValue.
857
858 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
859 The latter form is for AND+OR pairs where the first entry is what to
860 AND with the field and the second the one or OR with.
861
862 Raises BadValue if invalid value.
863 """
864 if not sValue:
865 raise TestType.BadValue('empty value');
866
867 # Deal with sign and detect hexadecimal or decimal.
868 fSignExtend = not self.fUnsigned;
869 if sValue[0] == '-' or sValue[0] == '+':
870 fSignExtend = True;
871 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
872 else:
873 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
874
875 # try convert it to long integer.
876 try:
877 iValue = long(sValue, 16 if fHex else 10);
878 except Exception as oXcpt:
879 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
880
881 # Convert the hex string and pad it to a decent value. Negative values
882 # needs to be manually converted to something non-negative (~-n + 1).
883 if iValue >= 0:
884 sHex = hex(iValue);
885 if sys.version_info[0] < 3:
886 assert sHex[-1] == 'L';
887 sHex = sHex[:-1];
888 assert sHex[:2] == '0x';
889 sHex = sHex[2:];
890 else:
891 sHex = hex(-iValue - 1);
892 if sys.version_info[0] < 3:
893 assert sHex[-1] == 'L';
894 sHex = sHex[:-1];
895 assert sHex[:2] == '0x';
896 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
897 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
898 sHex = 'f' + sHex;
899
900 cDigits = len(sHex);
901 if cDigits <= self.acbSizes[-1] * 2:
902 for cb in self.acbSizes:
903 cNaturalDigits = cb * 2;
904 if cDigits <= cNaturalDigits:
905 break;
906 else:
907 cNaturalDigits = self.acbSizes[-1] * 2;
908 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
909 assert isinstance(cNaturalDigits, int)
910
911 if cNaturalDigits != cDigits:
912 cNeeded = cNaturalDigits - cDigits;
913 if iValue >= 0:
914 sHex = ('0' * cNeeded) + sHex;
915 else:
916 sHex = ('f' * cNeeded) + sHex;
917
918 # Invert and convert to bytearray and return it.
919 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
920
921 return ((fSignExtend, abValue),);
922
923 def validate(self, sValue):
924 """
925 Returns True if value is okay, error message on failure.
926 """
927 try:
928 self.get(sValue);
929 except TestType.BadValue as oXcpt:
930 return oXcpt.sMessage;
931 return True;
932
933 def isAndOrPair(self, sValue):
934 """
935 Checks if sValue is a pair.
936 """
937 _ = sValue;
938 return False;
939
940
941class TestTypeEflags(TestType):
942 """
943 Special value parsing for EFLAGS/RFLAGS/FLAGS.
944 """
945
946 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
947
948 def __init__(self, sName):
949 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
950
951 def get(self, sValue):
952 fClear = 0;
953 fSet = 0;
954 for sFlag in sValue.split(','):
955 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
956 if sConstant is None:
957 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
958 if sConstant[0] == '!':
959 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
960 else:
961 fSet |= g_kdX86EFlagsConstants[sConstant];
962
963 aoSet = TestType.get(self, '0x%x' % (fSet,));
964 if fClear != 0:
965 aoClear = TestType.get(self, '%#x' % (fClear,))
966 assert self.isAndOrPair(sValue) is True;
967 return (aoClear[0], aoSet[0]);
968 assert self.isAndOrPair(sValue) is False;
969 return aoSet;
970
971 def isAndOrPair(self, sValue):
972 for sZeroFlag in self.kdZeroValueFlags:
973 if sValue.find(sZeroFlag) >= 0:
974 return True;
975 return False;
976
977class TestTypeFromDict(TestType):
978 """
979 Special value parsing for CR0.
980 """
981
982 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
983
984 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
985 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
986 self.kdConstantsAndValues = kdConstantsAndValues;
987 self.sConstantPrefix = sConstantPrefix;
988
989 def get(self, sValue):
990 fValue = 0;
991 for sFlag in sValue.split(','):
992 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
993 if fFlagValue is None:
994 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
995 fValue |= fFlagValue;
996 return TestType.get(self, '0x%x' % (fValue,));
997
998
999class TestInOut(object):
1000 """
1001 One input or output state modifier.
1002
1003 This should be thought as values to modify BS3REGCTX and extended (needs
1004 to be structured) state.
1005 """
1006 ## Assigned operators.
1007 kasOperators = [
1008 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1009 '&~=',
1010 '&=',
1011 '|=',
1012 '='
1013 ];
1014 ## Types
1015 kdTypes = {
1016 'uint': TestType('uint', fUnsigned = True),
1017 'int': TestType('int'),
1018 'efl': TestTypeEflags('efl'),
1019 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1020 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1021 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1022 };
1023 ## CPU context fields.
1024 kdFields = {
1025 # name: ( default type, [both|input|output], )
1026 # Operands.
1027 'op1': ( 'uint', 'both', ), ## \@op1
1028 'op2': ( 'uint', 'both', ), ## \@op2
1029 'op3': ( 'uint', 'both', ), ## \@op3
1030 'op4': ( 'uint', 'both', ), ## \@op4
1031 # Flags.
1032 'efl': ( 'efl', 'both', ),
1033 'efl_undef': ( 'uint', 'output', ),
1034 # 8-bit GPRs.
1035 'al': ( 'uint', 'both', ),
1036 'cl': ( 'uint', 'both', ),
1037 'dl': ( 'uint', 'both', ),
1038 'bl': ( 'uint', 'both', ),
1039 'ah': ( 'uint', 'both', ),
1040 'ch': ( 'uint', 'both', ),
1041 'dh': ( 'uint', 'both', ),
1042 'bh': ( 'uint', 'both', ),
1043 'r8l': ( 'uint', 'both', ),
1044 'r9l': ( 'uint', 'both', ),
1045 'r10l': ( 'uint', 'both', ),
1046 'r11l': ( 'uint', 'both', ),
1047 'r12l': ( 'uint', 'both', ),
1048 'r13l': ( 'uint', 'both', ),
1049 'r14l': ( 'uint', 'both', ),
1050 'r15l': ( 'uint', 'both', ),
1051 # 16-bit GPRs.
1052 'ax': ( 'uint', 'both', ),
1053 'dx': ( 'uint', 'both', ),
1054 'cx': ( 'uint', 'both', ),
1055 'bx': ( 'uint', 'both', ),
1056 'sp': ( 'uint', 'both', ),
1057 'bp': ( 'uint', 'both', ),
1058 'si': ( 'uint', 'both', ),
1059 'di': ( 'uint', 'both', ),
1060 'r8w': ( 'uint', 'both', ),
1061 'r9w': ( 'uint', 'both', ),
1062 'r10w': ( 'uint', 'both', ),
1063 'r11w': ( 'uint', 'both', ),
1064 'r12w': ( 'uint', 'both', ),
1065 'r13w': ( 'uint', 'both', ),
1066 'r14w': ( 'uint', 'both', ),
1067 'r15w': ( 'uint', 'both', ),
1068 # 32-bit GPRs.
1069 'eax': ( 'uint', 'both', ),
1070 'edx': ( 'uint', 'both', ),
1071 'ecx': ( 'uint', 'both', ),
1072 'ebx': ( 'uint', 'both', ),
1073 'esp': ( 'uint', 'both', ),
1074 'ebp': ( 'uint', 'both', ),
1075 'esi': ( 'uint', 'both', ),
1076 'edi': ( 'uint', 'both', ),
1077 'r8d': ( 'uint', 'both', ),
1078 'r9d': ( 'uint', 'both', ),
1079 'r10d': ( 'uint', 'both', ),
1080 'r11d': ( 'uint', 'both', ),
1081 'r12d': ( 'uint', 'both', ),
1082 'r13d': ( 'uint', 'both', ),
1083 'r14d': ( 'uint', 'both', ),
1084 'r15d': ( 'uint', 'both', ),
1085 # 64-bit GPRs.
1086 'rax': ( 'uint', 'both', ),
1087 'rdx': ( 'uint', 'both', ),
1088 'rcx': ( 'uint', 'both', ),
1089 'rbx': ( 'uint', 'both', ),
1090 'rsp': ( 'uint', 'both', ),
1091 'rbp': ( 'uint', 'both', ),
1092 'rsi': ( 'uint', 'both', ),
1093 'rdi': ( 'uint', 'both', ),
1094 'r8': ( 'uint', 'both', ),
1095 'r9': ( 'uint', 'both', ),
1096 'r10': ( 'uint', 'both', ),
1097 'r11': ( 'uint', 'both', ),
1098 'r12': ( 'uint', 'both', ),
1099 'r13': ( 'uint', 'both', ),
1100 'r14': ( 'uint', 'both', ),
1101 'r15': ( 'uint', 'both', ),
1102 # 16-bit, 32-bit or 64-bit registers according to operand size.
1103 'oz.rax': ( 'uint', 'both', ),
1104 'oz.rdx': ( 'uint', 'both', ),
1105 'oz.rcx': ( 'uint', 'both', ),
1106 'oz.rbx': ( 'uint', 'both', ),
1107 'oz.rsp': ( 'uint', 'both', ),
1108 'oz.rbp': ( 'uint', 'both', ),
1109 'oz.rsi': ( 'uint', 'both', ),
1110 'oz.rdi': ( 'uint', 'both', ),
1111 'oz.r8': ( 'uint', 'both', ),
1112 'oz.r9': ( 'uint', 'both', ),
1113 'oz.r10': ( 'uint', 'both', ),
1114 'oz.r11': ( 'uint', 'both', ),
1115 'oz.r12': ( 'uint', 'both', ),
1116 'oz.r13': ( 'uint', 'both', ),
1117 'oz.r14': ( 'uint', 'both', ),
1118 'oz.r15': ( 'uint', 'both', ),
1119 # Control registers.
1120 'cr0': ( 'cr0', 'both', ),
1121 'cr4': ( 'cr4', 'both', ),
1122 'xcr0': ( 'xcr0', 'both', ),
1123 # FPU Registers
1124 'fcw': ( 'uint', 'both', ),
1125 'fsw': ( 'uint', 'both', ),
1126 'ftw': ( 'uint', 'both', ),
1127 'fop': ( 'uint', 'both', ),
1128 'fpuip': ( 'uint', 'both', ),
1129 'fpucs': ( 'uint', 'both', ),
1130 'fpudp': ( 'uint', 'both', ),
1131 'fpuds': ( 'uint', 'both', ),
1132 'mxcsr': ( 'uint', 'both', ),
1133 'st0': ( 'uint', 'both', ),
1134 'st1': ( 'uint', 'both', ),
1135 'st2': ( 'uint', 'both', ),
1136 'st3': ( 'uint', 'both', ),
1137 'st4': ( 'uint', 'both', ),
1138 'st5': ( 'uint', 'both', ),
1139 'st6': ( 'uint', 'both', ),
1140 'st7': ( 'uint', 'both', ),
1141 # MMX registers.
1142 'mm0': ( 'uint', 'both', ),
1143 'mm1': ( 'uint', 'both', ),
1144 'mm2': ( 'uint', 'both', ),
1145 'mm3': ( 'uint', 'both', ),
1146 'mm4': ( 'uint', 'both', ),
1147 'mm5': ( 'uint', 'both', ),
1148 'mm6': ( 'uint', 'both', ),
1149 'mm7': ( 'uint', 'both', ),
1150 # SSE registers.
1151 'xmm0': ( 'uint', 'both', ),
1152 'xmm1': ( 'uint', 'both', ),
1153 'xmm2': ( 'uint', 'both', ),
1154 'xmm3': ( 'uint', 'both', ),
1155 'xmm4': ( 'uint', 'both', ),
1156 'xmm5': ( 'uint', 'both', ),
1157 'xmm6': ( 'uint', 'both', ),
1158 'xmm7': ( 'uint', 'both', ),
1159 'xmm8': ( 'uint', 'both', ),
1160 'xmm9': ( 'uint', 'both', ),
1161 'xmm10': ( 'uint', 'both', ),
1162 'xmm11': ( 'uint', 'both', ),
1163 'xmm12': ( 'uint', 'both', ),
1164 'xmm13': ( 'uint', 'both', ),
1165 'xmm14': ( 'uint', 'both', ),
1166 'xmm15': ( 'uint', 'both', ),
1167 'xmm0.lo': ( 'uint', 'both', ),
1168 'xmm1.lo': ( 'uint', 'both', ),
1169 'xmm2.lo': ( 'uint', 'both', ),
1170 'xmm3.lo': ( 'uint', 'both', ),
1171 'xmm4.lo': ( 'uint', 'both', ),
1172 'xmm5.lo': ( 'uint', 'both', ),
1173 'xmm6.lo': ( 'uint', 'both', ),
1174 'xmm7.lo': ( 'uint', 'both', ),
1175 'xmm8.lo': ( 'uint', 'both', ),
1176 'xmm9.lo': ( 'uint', 'both', ),
1177 'xmm10.lo': ( 'uint', 'both', ),
1178 'xmm11.lo': ( 'uint', 'both', ),
1179 'xmm12.lo': ( 'uint', 'both', ),
1180 'xmm13.lo': ( 'uint', 'both', ),
1181 'xmm14.lo': ( 'uint', 'both', ),
1182 'xmm15.lo': ( 'uint', 'both', ),
1183 'xmm0.hi': ( 'uint', 'both', ),
1184 'xmm1.hi': ( 'uint', 'both', ),
1185 'xmm2.hi': ( 'uint', 'both', ),
1186 'xmm3.hi': ( 'uint', 'both', ),
1187 'xmm4.hi': ( 'uint', 'both', ),
1188 'xmm5.hi': ( 'uint', 'both', ),
1189 'xmm6.hi': ( 'uint', 'both', ),
1190 'xmm7.hi': ( 'uint', 'both', ),
1191 'xmm8.hi': ( 'uint', 'both', ),
1192 'xmm9.hi': ( 'uint', 'both', ),
1193 'xmm10.hi': ( 'uint', 'both', ),
1194 'xmm11.hi': ( 'uint', 'both', ),
1195 'xmm12.hi': ( 'uint', 'both', ),
1196 'xmm13.hi': ( 'uint', 'both', ),
1197 'xmm14.hi': ( 'uint', 'both', ),
1198 'xmm15.hi': ( 'uint', 'both', ),
1199 'xmm0.lo.zx': ( 'uint', 'both', ),
1200 'xmm1.lo.zx': ( 'uint', 'both', ),
1201 'xmm2.lo.zx': ( 'uint', 'both', ),
1202 'xmm3.lo.zx': ( 'uint', 'both', ),
1203 'xmm4.lo.zx': ( 'uint', 'both', ),
1204 'xmm5.lo.zx': ( 'uint', 'both', ),
1205 'xmm6.lo.zx': ( 'uint', 'both', ),
1206 'xmm7.lo.zx': ( 'uint', 'both', ),
1207 'xmm8.lo.zx': ( 'uint', 'both', ),
1208 'xmm9.lo.zx': ( 'uint', 'both', ),
1209 'xmm10.lo.zx': ( 'uint', 'both', ),
1210 'xmm11.lo.zx': ( 'uint', 'both', ),
1211 'xmm12.lo.zx': ( 'uint', 'both', ),
1212 'xmm13.lo.zx': ( 'uint', 'both', ),
1213 'xmm14.lo.zx': ( 'uint', 'both', ),
1214 'xmm15.lo.zx': ( 'uint', 'both', ),
1215 'xmm0.dw0': ( 'uint', 'both', ),
1216 'xmm1.dw0': ( 'uint', 'both', ),
1217 'xmm2.dw0': ( 'uint', 'both', ),
1218 'xmm3.dw0': ( 'uint', 'both', ),
1219 'xmm4.dw0': ( 'uint', 'both', ),
1220 'xmm5.dw0': ( 'uint', 'both', ),
1221 'xmm6.dw0': ( 'uint', 'both', ),
1222 'xmm7.dw0': ( 'uint', 'both', ),
1223 'xmm8.dw0': ( 'uint', 'both', ),
1224 'xmm9.dw0': ( 'uint', 'both', ),
1225 'xmm10.dw0': ( 'uint', 'both', ),
1226 'xmm11.dw0': ( 'uint', 'both', ),
1227 'xmm12.dw0': ( 'uint', 'both', ),
1228 'xmm13.dw0': ( 'uint', 'both', ),
1229 'xmm14.dw0': ( 'uint', 'both', ),
1230 'xmm15_dw0': ( 'uint', 'both', ),
1231 # AVX registers.
1232 'ymm0': ( 'uint', 'both', ),
1233 'ymm1': ( 'uint', 'both', ),
1234 'ymm2': ( 'uint', 'both', ),
1235 'ymm3': ( 'uint', 'both', ),
1236 'ymm4': ( 'uint', 'both', ),
1237 'ymm5': ( 'uint', 'both', ),
1238 'ymm6': ( 'uint', 'both', ),
1239 'ymm7': ( 'uint', 'both', ),
1240 'ymm8': ( 'uint', 'both', ),
1241 'ymm9': ( 'uint', 'both', ),
1242 'ymm10': ( 'uint', 'both', ),
1243 'ymm11': ( 'uint', 'both', ),
1244 'ymm12': ( 'uint', 'both', ),
1245 'ymm13': ( 'uint', 'both', ),
1246 'ymm14': ( 'uint', 'both', ),
1247 'ymm15': ( 'uint', 'both', ),
1248
1249 # Special ones.
1250 'value.xcpt': ( 'uint', 'output', ),
1251 };
1252
1253 def __init__(self, sField, sOp, sValue, sType):
1254 assert sField in self.kdFields;
1255 assert sOp in self.kasOperators;
1256 self.sField = sField;
1257 self.sOp = sOp;
1258 self.sValue = sValue;
1259 self.sType = sType;
1260 assert isinstance(sField, str);
1261 assert isinstance(sOp, str);
1262 assert isinstance(sType, str);
1263 assert isinstance(sValue, str);
1264
1265
1266class TestSelector(object):
1267 """
1268 One selector for an instruction test.
1269 """
1270 ## Selector compare operators.
1271 kasCompareOps = [ '==', '!=' ];
1272 ## Selector variables and their valid values.
1273 kdVariables = {
1274 # Operand size.
1275 'size': {
1276 'o16': 'size_o16',
1277 'o32': 'size_o32',
1278 'o64': 'size_o64',
1279 },
1280 # VEX.L value.
1281 'vex.l': {
1282 '0': 'vexl_0',
1283 '1': 'vexl_1',
1284 },
1285 # Execution ring.
1286 'ring': {
1287 '0': 'ring_0',
1288 '1': 'ring_1',
1289 '2': 'ring_2',
1290 '3': 'ring_3',
1291 '0..2': 'ring_0_thru_2',
1292 '1..3': 'ring_1_thru_3',
1293 },
1294 # Basic code mode.
1295 'codebits': {
1296 '64': 'code_64bit',
1297 '32': 'code_32bit',
1298 '16': 'code_16bit',
1299 },
1300 # cpu modes.
1301 'mode': {
1302 'real': 'mode_real',
1303 'prot': 'mode_prot',
1304 'long': 'mode_long',
1305 'v86': 'mode_v86',
1306 'smm': 'mode_smm',
1307 'vmx': 'mode_vmx',
1308 'svm': 'mode_svm',
1309 },
1310 # paging on/off
1311 'paging': {
1312 'on': 'paging_on',
1313 'off': 'paging_off',
1314 },
1315 # CPU vendor
1316 'vendor': {
1317 'amd': 'vendor_amd',
1318 'intel': 'vendor_intel',
1319 'via': 'vendor_via',
1320 },
1321 };
1322 ## Selector shorthand predicates.
1323 ## These translates into variable expressions.
1324 kdPredicates = {
1325 'o16': 'size==o16',
1326 'o32': 'size==o32',
1327 'o64': 'size==o64',
1328 'ring0': 'ring==0',
1329 '!ring0': 'ring==1..3',
1330 'ring1': 'ring==1',
1331 'ring2': 'ring==2',
1332 'ring3': 'ring==3',
1333 'user': 'ring==3',
1334 'supervisor': 'ring==0..2',
1335 '16-bit': 'codebits==16',
1336 '32-bit': 'codebits==32',
1337 '64-bit': 'codebits==64',
1338 'real': 'mode==real',
1339 'prot': 'mode==prot',
1340 'long': 'mode==long',
1341 'v86': 'mode==v86',
1342 'smm': 'mode==smm',
1343 'vmx': 'mode==vmx',
1344 'svm': 'mode==svm',
1345 'paging': 'paging==on',
1346 '!paging': 'paging==off',
1347 'amd': 'vendor==amd',
1348 '!amd': 'vendor!=amd',
1349 'intel': 'vendor==intel',
1350 '!intel': 'vendor!=intel',
1351 'via': 'vendor==via',
1352 '!via': 'vendor!=via',
1353 };
1354
1355 def __init__(self, sVariable, sOp, sValue):
1356 assert sVariable in self.kdVariables;
1357 assert sOp in self.kasCompareOps;
1358 assert sValue in self.kdVariables[sVariable];
1359 self.sVariable = sVariable;
1360 self.sOp = sOp;
1361 self.sValue = sValue;
1362
1363
1364class InstructionTest(object):
1365 """
1366 Instruction test.
1367 """
1368
1369 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1370 self.oInstr = oInstr # type: InstructionTest
1371 self.aoInputs = [] # type: list(TestInOut)
1372 self.aoOutputs = [] # type: list(TestInOut)
1373 self.aoSelectors = [] # type: list(TestSelector)
1374
1375 def toString(self, fRepr = False):
1376 """
1377 Converts it to string representation.
1378 """
1379 asWords = [];
1380 if self.aoSelectors:
1381 for oSelector in self.aoSelectors:
1382 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1383 asWords.append('/');
1384
1385 for oModifier in self.aoInputs:
1386 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1387
1388 asWords.append('->');
1389
1390 for oModifier in self.aoOutputs:
1391 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1392
1393 if fRepr:
1394 return '<' + ' '.join(asWords) + '>';
1395 return ' '.join(asWords);
1396
1397 def __str__(self):
1398 """ Provide string represenation. """
1399 return self.toString(False);
1400
1401 def __repr__(self):
1402 """ Provide unambigious string representation. """
1403 return self.toString(True);
1404
1405class Operand(object):
1406 """
1407 Instruction operand.
1408 """
1409
1410 def __init__(self, sWhere, sType):
1411 assert sWhere in g_kdOpLocations, sWhere;
1412 assert sType in g_kdOpTypes, sType;
1413 self.sWhere = sWhere; ##< g_kdOpLocations
1414 self.sType = sType; ##< g_kdOpTypes
1415
1416 def usesModRM(self):
1417 """ Returns True if using some form of ModR/M encoding. """
1418 return self.sType[0] in ['E', 'G', 'M'];
1419
1420
1421
1422class Instruction(object): # pylint: disable=too-many-instance-attributes
1423 """
1424 Instruction.
1425 """
1426
1427 def __init__(self, sSrcFile, iLine):
1428 ## @name Core attributes.
1429 ## @{
1430 self.oParent = None # type: Instruction
1431 self.sMnemonic = None;
1432 self.sBrief = None;
1433 self.asDescSections = [] # type: list(str)
1434 self.aoMaps = [] # type: list(InstructionMap)
1435 self.aoOperands = [] # type: list(Operand)
1436 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1437 self.sOpcode = None # type: str
1438 self.sSubOpcode = None # type: str
1439 self.sEncoding = None;
1440 self.asFlTest = None;
1441 self.asFlModify = None;
1442 self.asFlUndefined = None;
1443 self.asFlSet = None;
1444 self.asFlClear = None;
1445 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1446 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1447 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1448 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1449 self.aoTests = [] # type: list(InstructionTest)
1450 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1451 self.oCpuExpr = None; ##< Some CPU restriction expression...
1452 self.sGroup = None;
1453 self.fUnused = False; ##< Unused instruction.
1454 self.fInvalid = False; ##< Invalid instruction (like UD2).
1455 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1456 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1457 ## @}
1458
1459 ## @name Implementation attributes.
1460 ## @{
1461 self.sStats = None;
1462 self.sFunction = None;
1463 self.fStub = False;
1464 self.fUdStub = False;
1465 ## @}
1466
1467 ## @name Decoding info
1468 ## @{
1469 self.sSrcFile = sSrcFile;
1470 self.iLineCreated = iLine;
1471 self.iLineCompleted = None;
1472 self.cOpTags = 0;
1473 self.iLineFnIemOpMacro = -1;
1474 self.iLineMnemonicMacro = -1;
1475 ## @}
1476
1477 ## @name Intermediate input fields.
1478 ## @{
1479 self.sRawDisOpNo = None;
1480 self.asRawDisParams = [];
1481 self.sRawIemOpFlags = None;
1482 self.sRawOldOpcodes = None;
1483 self.asCopyTests = [];
1484 ## @}
1485
1486 def toString(self, fRepr = False):
1487 """ Turn object into a string. """
1488 aasFields = [];
1489
1490 aasFields.append(['opcode', self.sOpcode]);
1491 if self.sPrefix:
1492 aasFields.append(['prefix', self.sPrefix]);
1493 aasFields.append(['mnemonic', self.sMnemonic]);
1494 for iOperand, oOperand in enumerate(self.aoOperands):
1495 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1496 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1497 aasFields.append(['encoding', self.sEncoding]);
1498 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1499 aasFields.append(['disenum', self.sDisEnum]);
1500 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1501 aasFields.append(['group', self.sGroup]);
1502 if self.fUnused: aasFields.append(['unused', 'True']);
1503 if self.fInvalid: aasFields.append(['invalid', 'True']);
1504 aasFields.append(['invlstyle', self.sInvalidStyle]);
1505 aasFields.append(['fltest', self.asFlTest]);
1506 aasFields.append(['flmodify', self.asFlModify]);
1507 aasFields.append(['flundef', self.asFlUndefined]);
1508 aasFields.append(['flset', self.asFlSet]);
1509 aasFields.append(['flclear', self.asFlClear]);
1510 aasFields.append(['mincpu', self.sMinCpu]);
1511 aasFields.append(['stats', self.sStats]);
1512 aasFields.append(['sFunction', self.sFunction]);
1513 if self.fStub: aasFields.append(['fStub', 'True']);
1514 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1515 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1516 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1517 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1518
1519 sRet = '<' if fRepr else '';
1520 for sField, sValue in aasFields:
1521 if sValue is not None:
1522 if len(sRet) > 1:
1523 sRet += '; ';
1524 sRet += '%s=%s' % (sField, sValue,);
1525 if fRepr:
1526 sRet += '>';
1527
1528 return sRet;
1529
1530 def __str__(self):
1531 """ Provide string represenation. """
1532 return self.toString(False);
1533
1534 def __repr__(self):
1535 """ Provide unambigious string representation. """
1536 return self.toString(True);
1537
1538 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1539 """
1540 Makes a copy of the object for the purpose of putting in a different map
1541 or a different place in the current map.
1542 """
1543 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1544
1545 oCopy.oParent = self;
1546 oCopy.sMnemonic = self.sMnemonic;
1547 oCopy.sBrief = self.sBrief;
1548 oCopy.asDescSections = list(self.asDescSections);
1549 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1550 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1551 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1552 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1553 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1554 oCopy.sEncoding = self.sEncoding;
1555 oCopy.asFlTest = self.asFlTest;
1556 oCopy.asFlModify = self.asFlModify;
1557 oCopy.asFlUndefined = self.asFlUndefined;
1558 oCopy.asFlSet = self.asFlSet;
1559 oCopy.asFlClear = self.asFlClear;
1560 oCopy.dHints = dict(self.dHints);
1561 oCopy.sDisEnum = self.sDisEnum;
1562 oCopy.asCpuIds = list(self.asCpuIds);
1563 oCopy.asReqFeatures = list(self.asReqFeatures);
1564 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1565 oCopy.sMinCpu = self.sMinCpu;
1566 oCopy.oCpuExpr = self.oCpuExpr;
1567 oCopy.sGroup = self.sGroup;
1568 oCopy.fUnused = self.fUnused;
1569 oCopy.fInvalid = self.fInvalid;
1570 oCopy.sInvalidStyle = self.sInvalidStyle;
1571 oCopy.sXcptType = self.sXcptType;
1572
1573 oCopy.sStats = self.sStats;
1574 oCopy.sFunction = self.sFunction;
1575 oCopy.fStub = self.fStub;
1576 oCopy.fUdStub = self.fUdStub;
1577
1578 oCopy.iLineCompleted = self.iLineCompleted;
1579 oCopy.cOpTags = self.cOpTags;
1580 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1581 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1582
1583 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1584 oCopy.asRawDisParams = list(self.asRawDisParams);
1585 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1586 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1587 oCopy.asCopyTests = list(self.asCopyTests);
1588
1589 return oCopy;
1590
1591 def getOpcodeByte(self):
1592 """
1593 Decodes sOpcode into a byte range integer value.
1594 Raises exception if sOpcode is None or invalid.
1595 """
1596 if self.sOpcode is None:
1597 raise Exception('No opcode byte for %s!' % (self,));
1598 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1599
1600 # Full hex byte form.
1601 if sOpcode[:2] == '0x':
1602 return int(sOpcode, 16);
1603
1604 # The /r form:
1605 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1606 return int(sOpcode[1:]) << 3;
1607
1608 # The 11/r form:
1609 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1610 return (int(sOpcode[-1:]) << 3) | 0xc0;
1611
1612 # The !11/r form (returns mod=1):
1613 ## @todo this doesn't really work...
1614 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1615 return (int(sOpcode[-1:]) << 3) | 0x80;
1616
1617 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1618
1619 @staticmethod
1620 def _flagsToIntegerMask(asFlags):
1621 """
1622 Returns the integer mask value for asFlags.
1623 """
1624 uRet = 0;
1625 if asFlags:
1626 for sFlag in asFlags:
1627 sConstant = g_kdEFlagsMnemonics[sFlag];
1628 assert sConstant[0] != '!', sConstant
1629 uRet |= g_kdX86EFlagsConstants[sConstant];
1630 return uRet;
1631
1632 def getTestedFlagsMask(self):
1633 """ Returns asFlTest into a integer mask value """
1634 return self._flagsToIntegerMask(self.asFlTest);
1635
1636 def getModifiedFlagsMask(self):
1637 """ Returns asFlModify into a integer mask value """
1638 return self._flagsToIntegerMask(self.asFlModify);
1639
1640 def getUndefinedFlagsMask(self):
1641 """ Returns asFlUndefined into a integer mask value """
1642 return self._flagsToIntegerMask(self.asFlUndefined);
1643
1644 def getSetFlagsMask(self):
1645 """ Returns asFlSet into a integer mask value """
1646 return self._flagsToIntegerMask(self.asFlSet);
1647
1648 def getClearedFlagsMask(self):
1649 """ Returns asFlClear into a integer mask value """
1650 return self._flagsToIntegerMask(self.asFlClear);
1651
1652 def onlyInVexMaps(self):
1653 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1654 if not self.aoMaps:
1655 return False;
1656 for oMap in self.aoMaps:
1657 if not oMap.isVexMap():
1658 return False;
1659 return True;
1660
1661
1662
1663## All the instructions.
1664g_aoAllInstructions = [] # type: list(Instruction)
1665
1666## All the instructions indexed by statistics name (opstat).
1667g_dAllInstructionsByStat = {} # type: dict(Instruction)
1668
1669## All the instructions indexed by function name (opfunction).
1670g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1671
1672## Instructions tagged by oponlytest
1673g_aoOnlyTestInstructions = [] # type: list(Instruction)
1674
1675## Instruction maps.
1676g_aoInstructionMaps = [
1677 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1678 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1679 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1680 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1681 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1682 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1683 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1684 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1685 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1686 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1687 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1688 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1689 ## @todo g_apfnEscF1_E0toFF
1690 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1691 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1692 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1693 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1694 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1695 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1696 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1698
1699 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1700 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1701 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1702 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1703 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1704 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1705 ## @todo What about g_apfnGroup9MemReg?
1706 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1707 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1708 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1709 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1710 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1711 ## @todo What about g_apfnGroup15RegReg?
1712 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1713 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1714 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1715
1716 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1717 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1718
1719 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1720 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1721 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1722 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1724 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1725
1726 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1727 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1728
1729 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1730 InstructionMap('xopmap8', sEncoding = 'xop8'),
1731 InstructionMap('xopmap9', sEncoding = 'xop9'),
1732 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1733 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1734 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1735 InstructionMap('xopmap10', sEncoding = 'xop10'),
1736 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737];
1738g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1739g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1740
1741
1742#
1743# "Microcode" statements and blocks
1744#
1745
1746class McStmt(object):
1747 """
1748 Statement in a microcode block.
1749 """
1750 def __init__(self, sName, asParams):
1751 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1752 self.asParams = asParams;
1753 self.oUser = None;
1754
1755 def renderCode(self, cchIndent = 0):
1756 """
1757 Renders the code for the statement.
1758 """
1759 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1760
1761 @staticmethod
1762 def renderCodeForList(aoStmts, cchIndent = 0):
1763 """
1764 Renders a list of statements.
1765 """
1766 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1767
1768 def isCppStmt(self):
1769 """ Checks if this is a C++ statement. """
1770 return self.sName.startswith('C++');
1771
1772class McStmtCond(McStmt):
1773 """
1774 Base class for conditional statements (IEM_MC_IF_XXX).
1775 """
1776 def __init__(self, sName, asParams):
1777 McStmt.__init__(self, sName, asParams);
1778 self.aoIfBranch = [];
1779 self.aoElseBranch = [];
1780
1781 def renderCode(self, cchIndent = 0):
1782 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1783 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1784 if self.aoElseBranch:
1785 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1786 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1787 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1788 return sRet;
1789
1790class McStmtVar(McStmt):
1791 """ IEM_MC_LOCAL_VAR* """
1792 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1793 McStmt.__init__(self, sName, asParams);
1794 self.sType = sType;
1795 self.sVarName = sVarName;
1796 self.sConstValue = sConstValue; ##< None if not const.
1797
1798class McStmtArg(McStmtVar):
1799 """ IEM_MC_ARG* """
1800 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1801 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1802 self.iArg = iArg;
1803 self.sRef = sRef; ##< The reference string (local variable, register).
1804 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1805 assert sRefType in ('none', 'local');
1806
1807
1808class McStmtCall(McStmt):
1809 """ IEM_MC_CALL_* """
1810 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1811 McStmt.__init__(self, sName, asParams);
1812 self.idxFn = iFnParam;
1813 self.idxParams = iFnParam + 1;
1814 self.sFn = asParams[iFnParam];
1815 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1816
1817class McCppGeneric(McStmt):
1818 """
1819 Generic C++/C statement.
1820 """
1821 def __init__(self, sCode, fDecode, sName = 'C++'):
1822 McStmt.__init__(self, sName, [sCode,]);
1823 self.fDecode = fDecode;
1824
1825 def renderCode(self, cchIndent = 0):
1826 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1827 if self.fDecode:
1828 sRet = sRet.replace('\n', ' // C++ decode\n');
1829 else:
1830 sRet = sRet.replace('\n', ' // C++ normal\n');
1831 return sRet;
1832
1833class McCppCond(McStmtCond):
1834 """
1835 C++/C 'if' statement.
1836 """
1837 def __init__(self, sCode, fDecode):
1838 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1839 self.fDecode = fDecode;
1840
1841 def renderCode(self, cchIndent = 0):
1842 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1843 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1844 sRet += ' ' * cchIndent + '{\n';
1845 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1846 sRet += ' ' * cchIndent + '}\n';
1847 if self.aoElseBranch:
1848 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1849 sRet += ' ' * cchIndent + '{\n';
1850 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1851 sRet += ' ' * cchIndent + '}\n';
1852 return sRet;
1853
1854class McCppPreProc(McCppGeneric):
1855 """
1856 C++/C Preprocessor directive.
1857 """
1858 def __init__(self, sCode):
1859 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1860
1861 def renderCode(self, cchIndent = 0):
1862 return self.asParams[0] + '\n';
1863
1864
1865class McBlock(object):
1866 """
1867 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1868 """
1869
1870 def __init__(self, sSrcFile, iBeginLine, offBeginLine, sFunction, iInFunction, cchIndent = None):
1871 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1872 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1873 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1874 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1875 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1876 self.sFunction = sFunction; ##< The function the block resides in.
1877 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1878 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1879 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1880 ## Decoded statements in the block.
1881 self.aoStmts = [] # type: list(McStmt)
1882
1883 def complete(self, iEndLine, offEndLine, asLines):
1884 """
1885 Completes the microcode block.
1886 """
1887 assert self.iEndLine == -1;
1888 self.iEndLine = iEndLine;
1889 self.offEndLine = offEndLine;
1890 self.asLines = asLines;
1891
1892 def raiseDecodeError(self, sRawCode, off, sMessage):
1893 """ Raises a decoding error. """
1894 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1895 iLine = sRawCode.count('\n', 0, off);
1896 raise ParserException('%s:%d:%d: parsing error: %s'
1897 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1898
1899 def raiseStmtError(self, sName, sMessage):
1900 """ Raises a statement parser error. """
1901 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1902
1903 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1904 """ Check the parameter count, raising an error it doesn't match. """
1905 if len(asParams) != cParamsExpected:
1906 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1907 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1908 return True;
1909
1910 @staticmethod
1911 def parseMcGeneric(oSelf, sName, asParams):
1912 """ Generic parser that returns a plain McStmt object. """
1913 _ = oSelf;
1914 return McStmt(sName, asParams);
1915
1916 @staticmethod
1917 def parseMcGenericCond(oSelf, sName, asParams):
1918 """ Generic parser that returns a plain McStmtCond object. """
1919 _ = oSelf;
1920 return McStmtCond(sName, asParams);
1921
1922 @staticmethod
1923 def parseMcBegin(oSelf, sName, asParams):
1924 """ IEM_MC_BEGIN """
1925 oSelf.checkStmtParamCount(sName, asParams, 2);
1926 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1927
1928 @staticmethod
1929 def parseMcArg(oSelf, sName, asParams):
1930 """ IEM_MC_ARG """
1931 oSelf.checkStmtParamCount(sName, asParams, 3);
1932 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1933
1934 @staticmethod
1935 def parseMcArgConst(oSelf, sName, asParams):
1936 """ IEM_MC_ARG_CONST """
1937 oSelf.checkStmtParamCount(sName, asParams, 4);
1938 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1939
1940 @staticmethod
1941 def parseMcArgLocalRef(oSelf, sName, asParams):
1942 """ IEM_MC_ARG_LOCAL_REF """
1943 oSelf.checkStmtParamCount(sName, asParams, 4);
1944 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1945
1946 @staticmethod
1947 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1948 """ IEM_MC_ARG_LOCAL_EFLAGS """
1949 oSelf.checkStmtParamCount(sName, asParams, 3);
1950 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
1951 return (
1952 McStmtVar('IEM_MC_LOCAL_VAR', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
1953 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[2], asParams[1]],
1954 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
1955 );
1956
1957 @staticmethod
1958 def parseMcLocal(oSelf, sName, asParams):
1959 """ IEM_MC_LOCAL """
1960 oSelf.checkStmtParamCount(sName, asParams, 2);
1961 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
1962
1963 @staticmethod
1964 def parseMcLocalConst(oSelf, sName, asParams):
1965 """ IEM_MC_LOCAL_CONST """
1966 oSelf.checkStmtParamCount(sName, asParams, 3);
1967 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
1968
1969 @staticmethod
1970 def parseMcCallAImpl(oSelf, sName, asParams):
1971 """ IEM_MC_CALL_AIMPL_3|4 """
1972 cArgs = int(sName[-1]);
1973 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
1974 return McStmtCall(sName, asParams, 1, 0);
1975
1976 @staticmethod
1977 def parseMcCallVoidAImpl(oSelf, sName, asParams):
1978 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
1979 cArgs = int(sName[-1]);
1980 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1981 return McStmtCall(sName, asParams, 0);
1982
1983 @staticmethod
1984 def parseMcCallAvxAImpl(oSelf, sName, asParams):
1985 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
1986 cArgs = int(sName[-1]);
1987 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1988 return McStmtCall(sName, asParams, 0);
1989
1990 @staticmethod
1991 def parseMcCallFpuAImpl(oSelf, sName, asParams):
1992 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
1993 cArgs = int(sName[-1]);
1994 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1995 return McStmtCall(sName, asParams, 0);
1996
1997 @staticmethod
1998 def parseMcCallMmxAImpl(oSelf, sName, asParams):
1999 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2000 cArgs = int(sName[-1]);
2001 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2002 return McStmtCall(sName, asParams, 0);
2003
2004 @staticmethod
2005 def parseMcCallSseAImpl(oSelf, sName, asParams):
2006 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2007 cArgs = int(sName[-1]);
2008 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2009 return McStmtCall(sName, asParams, 0);
2010
2011 @staticmethod
2012 def parseMcCallCImpl(oSelf, sName, asParams):
2013 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2014 cArgs = int(sName[-1]);
2015 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2016 return McStmtCall(sName, asParams, 0);
2017
2018 @staticmethod
2019 def stripComments(sCode):
2020 """ Returns sCode with comments removed. """
2021 off = 0;
2022 while off < len(sCode):
2023 off = sCode.find('/', off);
2024 if off < 0 or off + 1 >= len(sCode):
2025 break;
2026
2027 if sCode[off + 1] == '/':
2028 # C++ comment.
2029 offEnd = sCode.find('\n', off + 2);
2030 if offEnd < 0:
2031 return sCode[:off].rstrip();
2032 sCode = sCode[ : off] + sCode[offEnd : ];
2033 off += 1;
2034
2035 elif sCode[off + 1] == '*':
2036 # C comment
2037 offEnd = sCode.find('*/', off + 2);
2038 if offEnd < 0:
2039 return sCode[:off].rstrip();
2040 sSep = ' ';
2041 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2042 sSep = '';
2043 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2044 off += len(sSep);
2045
2046 else:
2047 # Not a comment.
2048 off += 1;
2049 return sCode;
2050
2051 @staticmethod
2052 def extractParam(sCode, offParam):
2053 """
2054 Extracts the parameter value at offParam in sCode.
2055 Returns stripped value and the end offset of the terminating ',' or ')'.
2056 """
2057 # Extract it.
2058 cNesting = 0;
2059 offStart = offParam;
2060 while offParam < len(sCode):
2061 ch = sCode[offParam];
2062 if ch == '(':
2063 cNesting += 1;
2064 elif ch == ')':
2065 if cNesting == 0:
2066 break;
2067 cNesting -= 1;
2068 elif ch == ',' and cNesting == 0:
2069 break;
2070 offParam += 1;
2071 return (sCode[offStart : offParam].strip(), offParam);
2072
2073 @staticmethod
2074 def extractParams(sCode, offOpenParen):
2075 """
2076 Parses a parameter list.
2077 Returns the list of parameter values and the offset of the closing parentheses.
2078 Returns (None, len(sCode)) on if no closing parentheses was found.
2079 """
2080 assert sCode[offOpenParen] == '(';
2081 asParams = [];
2082 off = offOpenParen + 1;
2083 while off < len(sCode):
2084 ch = sCode[off];
2085 if ch.isspace():
2086 off += 1;
2087 elif ch != ')':
2088 (sParam, off) = McBlock.extractParam(sCode, off);
2089 asParams.append(sParam);
2090 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2091 if sCode[off] == ',':
2092 off += 1;
2093 else:
2094 return (asParams, off);
2095 return (None, off);
2096
2097 @staticmethod
2098 def findClosingBraces(sCode, off, offStop):
2099 """
2100 Finds the matching '}' for the '{' at off in sCode.
2101 Returns offset of the matching '}' on success, otherwise -1.
2102
2103 Note! Does not take comments into account.
2104 """
2105 cDepth = 1;
2106 off += 1;
2107 while off < offStop:
2108 offClose = sCode.find('}', off, offStop);
2109 if offClose < 0:
2110 break;
2111 cDepth += sCode.count('{', off, offClose);
2112 cDepth -= 1;
2113 if cDepth == 0:
2114 return offClose;
2115 off = offClose + 1;
2116 return -1;
2117
2118 @staticmethod
2119 def countSpacesAt(sCode, off, offStop):
2120 """ Returns the number of space characters at off in sCode. """
2121 offStart = off;
2122 while off < offStop and sCode[off].isspace():
2123 off += 1;
2124 return off - offStart;
2125
2126 @staticmethod
2127 def skipSpacesAt(sCode, off, offStop):
2128 """ Returns first offset at or after off for a non-space character. """
2129 return off + McBlock.countSpacesAt(sCode, off, offStop);
2130
2131 @staticmethod
2132 def isSubstrAt(sStr, off, sSubStr):
2133 """ Returns true of sSubStr is found at off in sStr. """
2134 return sStr[off : off + len(sSubStr)] == sSubStr;
2135
2136 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2137 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2138 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2139 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2140 + r')');
2141
2142 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2143 """
2144 Decodes sRawCode[off : offStop].
2145
2146 Returns list of McStmt instances.
2147 Raises ParserException on failure.
2148 """
2149 if offStop < 0:
2150 offStop = len(sRawCode);
2151 aoStmts = [];
2152 while off < offStop:
2153 ch = sRawCode[off];
2154
2155 #
2156 # Skip spaces and comments.
2157 #
2158 if ch.isspace():
2159 off += 1;
2160
2161 elif ch == '/':
2162 ch = sRawCode[off + 1];
2163 if ch == '/': # C++ comment.
2164 off = sRawCode.find('\n', off + 2);
2165 if off < 0:
2166 break;
2167 off += 1;
2168 elif ch == '*': # C comment.
2169 off = sRawCode.find('*/', off + 2);
2170 if off < 0:
2171 break;
2172 off += 2;
2173 else:
2174 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2175
2176 #
2177 # Is it a MC statement.
2178 #
2179 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2180 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2181 # Extract it and strip comments from it.
2182 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2183 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2184 if offEnd <= off:
2185 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2186 else:
2187 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2188 if offEnd <= off:
2189 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2190 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2191 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2192 offEnd -= 1;
2193 while offEnd > off and sRawCode[offEnd - 1].isspace():
2194 offEnd -= 1;
2195
2196 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2197
2198 # Isolate the statement name.
2199 offOpenParen = sRawStmt.find('(');
2200 if offOpenParen < 0:
2201 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2202 sName = sRawStmt[: offOpenParen].strip();
2203
2204 # Extract the parameters.
2205 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2206 if asParams is None:
2207 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2208 if offCloseParen + 1 != len(sRawStmt):
2209 self.raiseDecodeError(sRawCode, off,
2210 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2211
2212 # Hand it to the handler.
2213 fnParser = g_dMcStmtParsers.get(sName);
2214 if not fnParser:
2215 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2216 oStmt = fnParser(self, sName, asParams);
2217 if not isinstance(oStmt, (list, tuple)):
2218 aoStmts.append(oStmt);
2219 else:
2220 aoStmts.extend(oStmt);
2221
2222 #
2223 # If conditional, we need to parse the whole statement.
2224 #
2225 # For reasons of simplicity, we assume the following structure
2226 # and parse each branch in a recursive call:
2227 # IEM_MC_IF_XXX() {
2228 # IEM_MC_WHATEVER();
2229 # } IEM_MC_ELSE() {
2230 # IEM_MC_WHATEVER();
2231 # } IEM_MC_ENDIF();
2232 #
2233 if sName.startswith('IEM_MC_IF_'):
2234 if iLevel > 1:
2235 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2236
2237 # Find start of the IF block:
2238 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2239 if sRawCode[offBlock1] != '{':
2240 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2241
2242 # Find the end of it.
2243 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2244 if offBlock1End < 0:
2245 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2246
2247 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2248
2249 # Is there an else section?
2250 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2251 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2252 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2253 if sRawCode[off] != '(':
2254 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2255 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2256 if sRawCode[off] != ')':
2257 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2258
2259 # Find start of the ELSE block.
2260 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2261 if sRawCode[offBlock2] != '{':
2262 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2263
2264 # Find the end of it.
2265 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2266 if offBlock2End < 0:
2267 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2268
2269 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2270 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2271
2272 # Parse past the endif statement.
2273 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2274 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2275 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2276 if sRawCode[off] != '(':
2277 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2278 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2279 if sRawCode[off] != ')':
2280 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2281 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2282 if sRawCode[off] != ';':
2283 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2284 off += 1;
2285
2286 else:
2287 # Advance.
2288 off = offEnd + 1;
2289
2290 #
2291 # Otherwise it must be a C/C++ statement of sorts.
2292 #
2293 else:
2294 # Find the end of the statement. if and else requires special handling.
2295 sCondExpr = None;
2296 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2297 if oMatch:
2298 if oMatch.group(1)[-1] == '(':
2299 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2300 else:
2301 offEnd = oMatch.end();
2302 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2303 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2304 elif ch == '#':
2305 offEnd = sRawCode.find('\n', off, offStop);
2306 if offEnd < 0:
2307 offEnd = offStop;
2308 offEnd -= 1;
2309 while offEnd > off and sRawCode[offEnd - 1].isspace():
2310 offEnd -= 1;
2311 else:
2312 offEnd = sRawCode.find(';', off);
2313 if offEnd < 0:
2314 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2315
2316 # Check this and the following statement whether it might have
2317 # something to do with decoding. This is a statement filter
2318 # criteria when generating the threaded functions blocks.
2319 offNextEnd = sRawCode.find(';', offEnd + 1);
2320 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2321 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2322 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2323 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2324 );
2325
2326 if not oMatch:
2327 if ch != '#':
2328 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2329 else:
2330 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2331 off = offEnd + 1;
2332 elif oMatch.group(1).startswith('if'):
2333 #
2334 # if () xxx [else yyy] statement.
2335 #
2336 oStmt = McCppCond(sCondExpr, fDecode);
2337 aoStmts.append(oStmt);
2338 off = offEnd + 1;
2339
2340 # Following the if () we can either have a {} containing zero or more statements
2341 # or we have a single statement.
2342 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2343 if sRawCode[offBlock1] == '{':
2344 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2345 if offBlock1End < 0:
2346 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2347 offBlock1 += 1;
2348 else:
2349 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2350 if offBlock1End < 0:
2351 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2352
2353 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2354
2355 # The else is optional and can likewise be followed by {} or a single statement.
2356 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2357 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2358 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2359 if sRawCode[offBlock2] == '{':
2360 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2361 if offBlock2End < 0:
2362 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2363 offBlock2 += 1;
2364 else:
2365 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2366 if offBlock2End < 0:
2367 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2368
2369 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2370 off = offBlock2End + 1;
2371
2372 elif oMatch.group(1) == 'else':
2373 # Problematic 'else' branch, typically involving #ifdefs.
2374 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2375
2376
2377 return aoStmts;
2378
2379
2380 def decode(self):
2381 """
2382 Decodes the block, populating self.aoStmts.
2383 Returns the statement list.
2384 Raises ParserException on failure.
2385 """
2386 self.aoStmts = self.decodeCode(''.join(self.asLines));
2387 return self.aoStmts;
2388
2389
2390## IEM_MC_XXX -> parser dictionary.
2391# The raw table was generated via the following command
2392# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2393# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2394g_dMcStmtParsers = {
2395 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2396 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2397 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2398 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2399 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2400 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2401 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2402 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2403 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2404 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2405 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2406 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2407 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2408 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2409 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2410 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2411 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2412 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2413 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2414 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2415 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2416 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2417 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2418 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2419 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2420 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2421 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2422 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2423 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2424 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2425 'IEM_MC_ARG': McBlock.parseMcArg,
2426 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2427 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2428 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2429 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2430 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2431 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2432 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2433 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2434 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2435 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2436 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2437 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2438 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2439 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2440 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2441 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2442 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2443 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2444 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2445 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2446 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2447 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2448 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2449 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2450 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2451 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2452 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2453 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2454 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2455 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2456 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2457 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2458 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2459 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2460 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2461 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2462 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2463 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2464 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2465 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2466 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2467 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2468 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2469 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2470 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2471 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2472 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2473 'IEM_MC_END': McBlock.parseMcGeneric,
2474 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2475 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2476 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2477 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2478 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2479 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2480 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2482 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2483 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2484 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2485 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2486 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2487 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2488 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2489 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2490 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2491 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2492 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2493 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2494 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2495 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2496 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2497 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2498 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2499 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2500 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2501 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2502 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2503 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2504 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2505 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2506 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2507 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2508 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2509 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2510 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2511 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2512 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2513 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2514 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2515 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2516 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2517 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2518 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2519 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2520 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2521 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2522 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2523 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2557 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2558 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2559 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2560 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2561 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2562 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2563 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2564 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2565 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2566 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2567 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2568 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2569 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2570 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2571 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2572 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2573 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2574 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2575 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2576 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2577 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2578 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2579 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2580 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2581 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2582 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2583 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2584 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2585 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2586 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2587 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2588 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2589 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2590 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2591 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2592 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2593 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2594 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2595 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2596 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2597 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2598 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2599 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2600 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2601 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2602 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2603 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2604 'IEM_MC_MAYBE_RAISE_AVX2_RELATED_XCPT': McBlock.parseMcGeneric,
2605 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2606 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2607 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2608 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2609 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT': McBlock.parseMcGeneric,
2610 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_EX': McBlock.parseMcGeneric,
2611 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2612 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2613 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2614 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2615 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2616 'IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT': McBlock.parseMcGeneric,
2617 'IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2618 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2619 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2620 'IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2621 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2622 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2623 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2624 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2625 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2626 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2627 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2628 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2629 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2630 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2631 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2632 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2633 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2634 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2635 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2636 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2637 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2638 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2639 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2640 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2641 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2642 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2643 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2644 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2645 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2646 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2647 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2648 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2649 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2650 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2651 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2652 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2653 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2654 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2655 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2656 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2657 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2658 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2659 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2660 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2661 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2662 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2663 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2664 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2665 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2666 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2667 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2668 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2669 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2670 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2671 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2672 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2673 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2674 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2675 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2676 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2677 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2678 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2679 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2680 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2681 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2682 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2683 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2684 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2685 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2686 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2687 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2688 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2689 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2690 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2691 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2692 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2693 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2694 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2695 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2696 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2697 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2698 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2699 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2700 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2701 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2702 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2703 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2704 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2705 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2706 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2707 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2708 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2709 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2710 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2711 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2712 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2713 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2714 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2715 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2716 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2717 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2718 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2719 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2720 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2721 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2722 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2723 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2724 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2725 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2726 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2727 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2728 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2729 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2730 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2731 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2732 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2733 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2734 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2735 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2736 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2737 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2738 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2739 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2740 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2741 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2742 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2743 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2744 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2745 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2746 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2747 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2758 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2759 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2760 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2761 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2762 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2763 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2764 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2765 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2766 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2767 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2768 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2769 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2770};
2771
2772## List of microcode blocks.
2773g_aoMcBlocks = [] # type: list(McBlock)
2774
2775
2776
2777class ParserException(Exception):
2778 """ Parser exception """
2779 def __init__(self, sMessage):
2780 Exception.__init__(self, sMessage);
2781
2782
2783class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2784 """
2785 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2786 """
2787
2788 ## @name Parser state.
2789 ## @{
2790 kiCode = 0;
2791 kiCommentMulti = 1;
2792 ## @}
2793
2794 class Macro(object):
2795 """ Macro """
2796 def __init__(self, sName, asArgs, sBody, iLine):
2797 self.sName = sName; ##< The macro name.
2798 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2799 self.sBody = sBody;
2800 self.iLine = iLine;
2801 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2802
2803 @staticmethod
2804 def _needSpace(ch):
2805 """ This is just to make the expanded output a bit prettier. """
2806 return ch.isspace() and ch != '(';
2807
2808 def expandMacro(self, oParent, asArgs = None):
2809 """ Expands the macro body with the given arguments. """
2810 _ = oParent;
2811 sBody = self.sBody;
2812
2813 if self.oReArgMatch:
2814 assert len(asArgs) == len(self.asArgs);
2815 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2816
2817 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2818 oMatch = self.oReArgMatch.search(sBody);
2819 while oMatch:
2820 sName = oMatch.group(2);
2821 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2822 sValue = dArgs[sName];
2823 sPre = '';
2824 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2825 sPre = ' ';
2826 sPost = '';
2827 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2828 sPost = ' ';
2829 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2830 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2831 else:
2832 assert not asArgs;
2833
2834 return sBody;
2835
2836
2837 def __init__(self, sSrcFile, asLines, sDefaultMap):
2838 self.sSrcFile = sSrcFile;
2839 self.asLines = asLines;
2840 self.iLine = 0;
2841 self.iState = self.kiCode;
2842 self.sComment = '';
2843 self.iCommentLine = 0;
2844 self.aoCurInstrs = [] # type: list(Instruction)
2845 self.sCurFunction = None # type: str
2846 self.iMcBlockInFunc = 0;
2847 self.oCurMcBlock = None # type: McBlock
2848 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2849 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2850
2851 assert sDefaultMap in g_dInstructionMaps;
2852 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2853
2854 self.cTotalInstr = 0;
2855 self.cTotalStubs = 0;
2856 self.cTotalTagged = 0;
2857 self.cTotalMcBlocks = 0;
2858
2859 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2860 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2861 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2862 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2863 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2864 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2865 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2866 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2867 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2868 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2869 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2870 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2871 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2872
2873 self.fDebug = True;
2874 self.fDebugMc = False;
2875 self.fDebugPreProc = False;
2876
2877 self.dTagHandlers = {
2878 '@opbrief': self.parseTagOpBrief,
2879 '@opdesc': self.parseTagOpDesc,
2880 '@opmnemonic': self.parseTagOpMnemonic,
2881 '@op1': self.parseTagOpOperandN,
2882 '@op2': self.parseTagOpOperandN,
2883 '@op3': self.parseTagOpOperandN,
2884 '@op4': self.parseTagOpOperandN,
2885 '@oppfx': self.parseTagOpPfx,
2886 '@opmaps': self.parseTagOpMaps,
2887 '@opcode': self.parseTagOpcode,
2888 '@opcodesub': self.parseTagOpcodeSub,
2889 '@openc': self.parseTagOpEnc,
2890 '@opfltest': self.parseTagOpEFlags,
2891 '@opflmodify': self.parseTagOpEFlags,
2892 '@opflundef': self.parseTagOpEFlags,
2893 '@opflset': self.parseTagOpEFlags,
2894 '@opflclear': self.parseTagOpEFlags,
2895 '@ophints': self.parseTagOpHints,
2896 '@opdisenum': self.parseTagOpDisEnum,
2897 '@opmincpu': self.parseTagOpMinCpu,
2898 '@opcpuid': self.parseTagOpCpuId,
2899 '@opgroup': self.parseTagOpGroup,
2900 '@opunused': self.parseTagOpUnusedInvalid,
2901 '@opinvalid': self.parseTagOpUnusedInvalid,
2902 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2903 '@optest': self.parseTagOpTest,
2904 '@optestign': self.parseTagOpTestIgnore,
2905 '@optestignore': self.parseTagOpTestIgnore,
2906 '@opcopytests': self.parseTagOpCopyTests,
2907 '@oponly': self.parseTagOpOnlyTest,
2908 '@oponlytest': self.parseTagOpOnlyTest,
2909 '@opxcpttype': self.parseTagOpXcptType,
2910 '@opstats': self.parseTagOpStats,
2911 '@opfunction': self.parseTagOpFunction,
2912 '@opdone': self.parseTagOpDone,
2913 };
2914 for i in range(48):
2915 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2916 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2917
2918 self.asErrors = [];
2919
2920 def raiseError(self, sMessage):
2921 """
2922 Raise error prefixed with the source and line number.
2923 """
2924 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2925
2926 def raiseCommentError(self, iLineInComment, sMessage):
2927 """
2928 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2929 """
2930 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2931
2932 def error(self, sMessage):
2933 """
2934 Adds an error.
2935 returns False;
2936 """
2937 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2938 return False;
2939
2940 def errorOnLine(self, iLine, sMessage):
2941 """
2942 Adds an error.
2943 returns False;
2944 """
2945 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2946 return False;
2947
2948 def errorComment(self, iLineInComment, sMessage):
2949 """
2950 Adds a comment error.
2951 returns False;
2952 """
2953 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2954 return False;
2955
2956 def printErrors(self):
2957 """
2958 Print the errors to stderr.
2959 Returns number of errors.
2960 """
2961 if self.asErrors:
2962 sys.stderr.write(u''.join(self.asErrors));
2963 return len(self.asErrors);
2964
2965 def debug(self, sMessage):
2966 """
2967 For debugging.
2968 """
2969 if self.fDebug:
2970 print('debug: %s' % (sMessage,), file = sys.stderr);
2971
2972 def stripComments(self, sLine):
2973 """
2974 Returns sLine with comments stripped.
2975
2976 Complains if traces of incomplete multi-line comments are encountered.
2977 """
2978 sLine = self.oReComment.sub(" ", sLine);
2979 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
2980 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
2981 return sLine;
2982
2983 def parseFunctionTable(self, sLine):
2984 """
2985 Parses a PFNIEMOP table, updating/checking the @oppfx value.
2986
2987 Note! Updates iLine as it consumes the whole table.
2988 """
2989
2990 #
2991 # Extract the table name.
2992 #
2993 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
2994 oMap = g_dInstructionMapsByIemName.get(sName);
2995 if not oMap:
2996 self.debug('No map for PFNIEMOP table: %s' % (sName,));
2997 oMap = self.oDefaultMap; # This is wrong wrong wrong.
2998
2999 #
3000 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3001 # entries per byte:
3002 # no prefix, 066h prefix, f3h prefix, f2h prefix
3003 # Those tables has 256 & 32 entries respectively.
3004 #
3005 cEntriesPerByte = 4;
3006 cValidTableLength = 1024;
3007 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3008
3009 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3010 if oEntriesMatch:
3011 cEntriesPerByte = 1;
3012 cValidTableLength = int(oEntriesMatch.group(1));
3013 asPrefixes = (None,);
3014
3015 #
3016 # The next line should be '{' and nothing else.
3017 #
3018 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3019 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3020 self.iLine += 1;
3021
3022 #
3023 # Parse till we find the end of the table.
3024 #
3025 iEntry = 0;
3026 while self.iLine < len(self.asLines):
3027 # Get the next line and strip comments and spaces (assumes no
3028 # multi-line comments).
3029 sLine = self.asLines[self.iLine];
3030 self.iLine += 1;
3031 sLine = self.stripComments(sLine).strip();
3032
3033 # Split the line up into entries, expanding IEMOP_X4 usage.
3034 asEntries = sLine.split(',');
3035 for i in range(len(asEntries) - 1, -1, -1):
3036 sEntry = asEntries[i].strip();
3037 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3038 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3039 asEntries.insert(i + 1, sEntry);
3040 asEntries.insert(i + 1, sEntry);
3041 asEntries.insert(i + 1, sEntry);
3042 if sEntry:
3043 asEntries[i] = sEntry;
3044 else:
3045 del asEntries[i];
3046
3047 # Process the entries.
3048 for sEntry in asEntries:
3049 if sEntry in ('};', '}'):
3050 if iEntry != cValidTableLength:
3051 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3052 return True;
3053 if sEntry.startswith('iemOp_Invalid'):
3054 pass; # skip
3055 else:
3056 # Look up matching instruction by function.
3057 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3058 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3059 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3060 if aoInstr:
3061 if not isinstance(aoInstr, list):
3062 aoInstr = [aoInstr,];
3063 oInstr = None;
3064 for oCurInstr in aoInstr:
3065 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3066 pass;
3067 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3068 oCurInstr.sPrefix = sPrefix;
3069 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3070 oCurInstr.sOpcode = sOpcode;
3071 oCurInstr.sPrefix = sPrefix;
3072 else:
3073 continue;
3074 oInstr = oCurInstr;
3075 break;
3076 if not oInstr:
3077 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3078 aoInstr.append(oInstr);
3079 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3080 g_aoAllInstructions.append(oInstr);
3081 oMap.aoInstructions.append(oInstr);
3082 else:
3083 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3084 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3085 iEntry += 1;
3086
3087 return self.error('Unexpected end of file in PFNIEMOP table');
3088
3089 def addInstruction(self, iLine = None):
3090 """
3091 Adds an instruction.
3092 """
3093 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3094 g_aoAllInstructions.append(oInstr);
3095 self.aoCurInstrs.append(oInstr);
3096 return oInstr;
3097
3098 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3099 """
3100 Derives the mnemonic and operands from a IEM stats base name like string.
3101 """
3102 if oInstr.sMnemonic is None:
3103 asWords = sStats.split('_');
3104 oInstr.sMnemonic = asWords[0].lower();
3105 if len(asWords) > 1 and not oInstr.aoOperands:
3106 for sType in asWords[1:]:
3107 if sType in g_kdOpTypes:
3108 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3109 else:
3110 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3111 return False;
3112 return True;
3113
3114 def doneInstructionOne(self, oInstr, iLine):
3115 """
3116 Complete the parsing by processing, validating and expanding raw inputs.
3117 """
3118 assert oInstr.iLineCompleted is None;
3119 oInstr.iLineCompleted = iLine;
3120
3121 #
3122 # Specified instructions.
3123 #
3124 if oInstr.cOpTags > 0:
3125 if oInstr.sStats is None:
3126 pass;
3127
3128 #
3129 # Unspecified legacy stuff. We generally only got a few things to go on here.
3130 # /** Opcode 0x0f 0x00 /0. */
3131 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3132 #
3133 else:
3134 #if oInstr.sRawOldOpcodes:
3135 #
3136 #if oInstr.sMnemonic:
3137 pass;
3138
3139 #
3140 # Common defaults.
3141 #
3142
3143 # Guess mnemonic and operands from stats if the former is missing.
3144 if oInstr.sMnemonic is None:
3145 if oInstr.sStats is not None:
3146 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3147 elif oInstr.sFunction is not None:
3148 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3149
3150 # Derive the disassembler op enum constant from the mnemonic.
3151 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3152 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3153
3154 # Derive the IEM statistics base name from mnemonic and operand types.
3155 if oInstr.sStats is None:
3156 if oInstr.sFunction is not None:
3157 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3158 elif oInstr.sMnemonic is not None:
3159 oInstr.sStats = oInstr.sMnemonic;
3160 for oOperand in oInstr.aoOperands:
3161 if oOperand.sType:
3162 oInstr.sStats += '_' + oOperand.sType;
3163
3164 # Derive the IEM function name from mnemonic and operand types.
3165 if oInstr.sFunction is None:
3166 if oInstr.sMnemonic is not None:
3167 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3168 for oOperand in oInstr.aoOperands:
3169 if oOperand.sType:
3170 oInstr.sFunction += '_' + oOperand.sType;
3171 elif oInstr.sStats:
3172 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3173
3174 #
3175 # Apply default map and then add the instruction to all it's groups.
3176 #
3177 if not oInstr.aoMaps:
3178 oInstr.aoMaps = [ self.oDefaultMap, ];
3179 for oMap in oInstr.aoMaps:
3180 oMap.aoInstructions.append(oInstr);
3181
3182 #
3183 # Derive encoding from operands and maps.
3184 #
3185 if oInstr.sEncoding is None:
3186 if not oInstr.aoOperands:
3187 if oInstr.fUnused and oInstr.sSubOpcode:
3188 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3189 else:
3190 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3191 elif oInstr.aoOperands[0].usesModRM():
3192 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3193 or oInstr.onlyInVexMaps():
3194 oInstr.sEncoding = 'VEX.ModR/M';
3195 else:
3196 oInstr.sEncoding = 'ModR/M';
3197
3198 #
3199 # Check the opstat value and add it to the opstat indexed dictionary.
3200 #
3201 if oInstr.sStats:
3202 if oInstr.sStats not in g_dAllInstructionsByStat:
3203 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3204 else:
3205 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3206 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3207
3208 #
3209 # Add to function indexed dictionary. We allow multiple instructions per function.
3210 #
3211 if oInstr.sFunction:
3212 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3213 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3214 else:
3215 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3216
3217 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3218 return True;
3219
3220 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3221 """
3222 Done with current instruction.
3223 """
3224 for oInstr in self.aoCurInstrs:
3225 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3226 if oInstr.fStub:
3227 self.cTotalStubs += 1;
3228
3229 self.cTotalInstr += len(self.aoCurInstrs);
3230
3231 self.sComment = '';
3232 self.aoCurInstrs = [];
3233 if fEndOfFunction:
3234 #self.debug('%s: sCurFunction=None' % (self.iLine, ));
3235 self.sCurFunction = None;
3236 self.iMcBlockInFunc = 0;
3237 return True;
3238
3239 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3240 """
3241 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3242 is False, only None values and empty strings are replaced.
3243 """
3244 for oInstr in self.aoCurInstrs:
3245 if fOverwrite is not True:
3246 oOldValue = getattr(oInstr, sAttrib);
3247 if oOldValue is not None:
3248 continue;
3249 setattr(oInstr, sAttrib, oValue);
3250
3251 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3252 """
3253 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3254 If fOverwrite is False, only None values and empty strings are replaced.
3255 """
3256 for oInstr in self.aoCurInstrs:
3257 aoArray = getattr(oInstr, sAttrib);
3258 while len(aoArray) <= iEntry:
3259 aoArray.append(None);
3260 if fOverwrite is True or aoArray[iEntry] is None:
3261 aoArray[iEntry] = oValue;
3262
3263 def parseCommentOldOpcode(self, asLines):
3264 """ Deals with 'Opcode 0xff /4' like comments """
3265 asWords = asLines[0].split();
3266 if len(asWords) >= 2 \
3267 and asWords[0] == 'Opcode' \
3268 and ( asWords[1].startswith('0x')
3269 or asWords[1].startswith('0X')):
3270 asWords = asWords[:1];
3271 for iWord, sWord in enumerate(asWords):
3272 if sWord.startswith('0X'):
3273 sWord = '0x' + sWord[:2];
3274 asWords[iWord] = asWords;
3275 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3276
3277 return False;
3278
3279 def ensureInstructionForOpTag(self, iTagLine):
3280 """ Ensure there is an instruction for the op-tag being parsed. """
3281 if not self.aoCurInstrs:
3282 self.addInstruction(self.iCommentLine + iTagLine);
3283 for oInstr in self.aoCurInstrs:
3284 oInstr.cOpTags += 1;
3285 if oInstr.cOpTags == 1:
3286 self.cTotalTagged += 1;
3287 return self.aoCurInstrs[-1];
3288
3289 @staticmethod
3290 def flattenSections(aasSections):
3291 """
3292 Flattens multiline sections into stripped single strings.
3293 Returns list of strings, on section per string.
3294 """
3295 asRet = [];
3296 for asLines in aasSections:
3297 if asLines:
3298 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3299 return asRet;
3300
3301 @staticmethod
3302 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3303 """
3304 Flattens sections into a simple stripped string with newlines as
3305 section breaks. The final section does not sport a trailing newline.
3306 """
3307 # Typical: One section with a single line.
3308 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3309 return aasSections[0][0].strip();
3310
3311 sRet = '';
3312 for iSection, asLines in enumerate(aasSections):
3313 if asLines:
3314 if iSection > 0:
3315 sRet += sSectionSep;
3316 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3317 return sRet;
3318
3319
3320
3321 ## @name Tag parsers
3322 ## @{
3323
3324 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3325 """
3326 Tag: \@opbrief
3327 Value: Text description, multiple sections, appended.
3328
3329 Brief description. If not given, it's the first sentence from @opdesc.
3330 """
3331 oInstr = self.ensureInstructionForOpTag(iTagLine);
3332
3333 # Flatten and validate the value.
3334 sBrief = self.flattenAllSections(aasSections);
3335 if not sBrief:
3336 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3337 if sBrief[-1] != '.':
3338 sBrief = sBrief + '.';
3339 if len(sBrief) > 180:
3340 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3341 offDot = sBrief.find('.');
3342 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3343 offDot = sBrief.find('.', offDot + 1);
3344 if offDot >= 0 and offDot != len(sBrief) - 1:
3345 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3346
3347 # Update the instruction.
3348 if oInstr.sBrief is not None:
3349 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3350 % (sTag, oInstr.sBrief, sBrief,));
3351 _ = iEndLine;
3352 return True;
3353
3354 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3355 """
3356 Tag: \@opdesc
3357 Value: Text description, multiple sections, appended.
3358
3359 It is used to describe instructions.
3360 """
3361 oInstr = self.ensureInstructionForOpTag(iTagLine);
3362 if aasSections:
3363 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3364 return True;
3365
3366 _ = sTag; _ = iEndLine;
3367 return True;
3368
3369 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3370 """
3371 Tag: @opmenmonic
3372 Value: mnemonic
3373
3374 The 'mnemonic' value must be a valid C identifier string. Because of
3375 prefixes, groups and whatnot, there times when the mnemonic isn't that
3376 of an actual assembler mnemonic.
3377 """
3378 oInstr = self.ensureInstructionForOpTag(iTagLine);
3379
3380 # Flatten and validate the value.
3381 sMnemonic = self.flattenAllSections(aasSections);
3382 if not self.oReMnemonic.match(sMnemonic):
3383 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3384 if oInstr.sMnemonic is not None:
3385 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3386 % (sTag, oInstr.sMnemonic, sMnemonic,));
3387 oInstr.sMnemonic = sMnemonic
3388
3389 _ = iEndLine;
3390 return True;
3391
3392 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3393 """
3394 Tags: \@op1, \@op2, \@op3, \@op4
3395 Value: [where:]type
3396
3397 The 'where' value indicates where the operand is found, like the 'reg'
3398 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3399 a list.
3400
3401 The 'type' value indicates the operand type. These follow the types
3402 given in the opcode tables in the CPU reference manuals.
3403 See Instruction.kdOperandTypes for a list.
3404
3405 """
3406 oInstr = self.ensureInstructionForOpTag(iTagLine);
3407 idxOp = int(sTag[-1]) - 1;
3408 assert 0 <= idxOp < 4;
3409
3410 # flatten, split up, and validate the "where:type" value.
3411 sFlattened = self.flattenAllSections(aasSections);
3412 asSplit = sFlattened.split(':');
3413 if len(asSplit) == 1:
3414 sType = asSplit[0];
3415 sWhere = None;
3416 elif len(asSplit) == 2:
3417 (sWhere, sType) = asSplit;
3418 else:
3419 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3420
3421 if sType not in g_kdOpTypes:
3422 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3423 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3424 if sWhere is None:
3425 sWhere = g_kdOpTypes[sType][1];
3426 elif sWhere not in g_kdOpLocations:
3427 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3428 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3429
3430 # Insert the operand, refusing to overwrite an existing one.
3431 while idxOp >= len(oInstr.aoOperands):
3432 oInstr.aoOperands.append(None);
3433 if oInstr.aoOperands[idxOp] is not None:
3434 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3435 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3436 sWhere, sType,));
3437 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3438
3439 _ = iEndLine;
3440 return True;
3441
3442 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3443 """
3444 Tag: \@opmaps
3445 Value: map[,map2]
3446
3447 Indicates which maps the instruction is in. There is a default map
3448 associated with each input file.
3449 """
3450 oInstr = self.ensureInstructionForOpTag(iTagLine);
3451
3452 # Flatten, split up and validate the value.
3453 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3454 asMaps = sFlattened.split(',');
3455 if not asMaps:
3456 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3457 for sMap in asMaps:
3458 if sMap not in g_dInstructionMaps:
3459 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3460 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3461
3462 # Add the maps to the current list. Throw errors on duplicates.
3463 for oMap in oInstr.aoMaps:
3464 if oMap.sName in asMaps:
3465 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3466
3467 for sMap in asMaps:
3468 oMap = g_dInstructionMaps[sMap];
3469 if oMap not in oInstr.aoMaps:
3470 oInstr.aoMaps.append(oMap);
3471 else:
3472 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3473
3474 _ = iEndLine;
3475 return True;
3476
3477 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3478 """
3479 Tag: \@oppfx
3480 Value: n/a|none|0x66|0xf3|0xf2
3481
3482 Required prefix for the instruction. (In a (E)VEX context this is the
3483 value of the 'pp' field rather than an actual prefix.)
3484 """
3485 oInstr = self.ensureInstructionForOpTag(iTagLine);
3486
3487 # Flatten and validate the value.
3488 sFlattened = self.flattenAllSections(aasSections);
3489 asPrefixes = sFlattened.split();
3490 if len(asPrefixes) > 1:
3491 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3492
3493 sPrefix = asPrefixes[0].lower();
3494 if sPrefix == 'none':
3495 sPrefix = 'none';
3496 elif sPrefix == 'n/a':
3497 sPrefix = None;
3498 else:
3499 if len(sPrefix) == 2:
3500 sPrefix = '0x' + sPrefix;
3501 if not _isValidOpcodeByte(sPrefix):
3502 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3503
3504 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3505 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3506
3507 # Set it.
3508 if oInstr.sPrefix is not None:
3509 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3510 oInstr.sPrefix = sPrefix;
3511
3512 _ = iEndLine;
3513 return True;
3514
3515 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3516 """
3517 Tag: \@opcode
3518 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3519
3520 The opcode byte or sub-byte for the instruction in the context of a map.
3521 """
3522 oInstr = self.ensureInstructionForOpTag(iTagLine);
3523
3524 # Flatten and validate the value.
3525 sOpcode = self.flattenAllSections(aasSections);
3526 if _isValidOpcodeByte(sOpcode):
3527 pass;
3528 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3529 pass;
3530 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3531 pass;
3532 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3533 pass;
3534 else:
3535 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3536
3537 # Set it.
3538 if oInstr.sOpcode is not None:
3539 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3540 oInstr.sOpcode = sOpcode;
3541
3542 _ = iEndLine;
3543 return True;
3544
3545 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3546 """
3547 Tag: \@opcodesub
3548 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3549 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3550
3551 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3552 represents exactly two different instructions. The more proper way would
3553 be to go via maps with two members, but this is faster.
3554 """
3555 oInstr = self.ensureInstructionForOpTag(iTagLine);
3556
3557 # Flatten and validate the value.
3558 sSubOpcode = self.flattenAllSections(aasSections);
3559 if sSubOpcode not in g_kdSubOpcodes:
3560 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3561 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3562
3563 # Set it.
3564 if oInstr.sSubOpcode is not None:
3565 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3566 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3567 oInstr.sSubOpcode = sSubOpcode;
3568
3569 _ = iEndLine;
3570 return True;
3571
3572 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3573 """
3574 Tag: \@openc
3575 Value: ModR/M|fixed|prefix|<map name>
3576
3577 The instruction operand encoding style.
3578 """
3579 oInstr = self.ensureInstructionForOpTag(iTagLine);
3580
3581 # Flatten and validate the value.
3582 sEncoding = self.flattenAllSections(aasSections);
3583 if sEncoding in g_kdEncodings:
3584 pass;
3585 elif sEncoding in g_dInstructionMaps:
3586 pass;
3587 elif not _isValidOpcodeByte(sEncoding):
3588 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3589
3590 # Set it.
3591 if oInstr.sEncoding is not None:
3592 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3593 % ( sTag, oInstr.sEncoding, sEncoding,));
3594 oInstr.sEncoding = sEncoding;
3595
3596 _ = iEndLine;
3597 return True;
3598
3599 ## EFlags tag to Instruction attribute name.
3600 kdOpFlagToAttr = {
3601 '@opfltest': 'asFlTest',
3602 '@opflmodify': 'asFlModify',
3603 '@opflundef': 'asFlUndefined',
3604 '@opflset': 'asFlSet',
3605 '@opflclear': 'asFlClear',
3606 };
3607
3608 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3609 """
3610 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3611 Value: <eflags specifier>
3612
3613 """
3614 oInstr = self.ensureInstructionForOpTag(iTagLine);
3615
3616 # Flatten, split up and validate the values.
3617 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3618 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3619 asFlags = [];
3620 else:
3621 fRc = True;
3622 for iFlag, sFlag in enumerate(asFlags):
3623 if sFlag not in g_kdEFlagsMnemonics:
3624 if sFlag.strip() in g_kdEFlagsMnemonics:
3625 asFlags[iFlag] = sFlag.strip();
3626 else:
3627 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3628 if not fRc:
3629 return False;
3630
3631 # Set them.
3632 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3633 if asOld is not None:
3634 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3635 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3636
3637 _ = iEndLine;
3638 return True;
3639
3640 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3641 """
3642 Tag: \@ophints
3643 Value: Comma or space separated list of flags and hints.
3644
3645 This covers the disassembler flags table and more.
3646 """
3647 oInstr = self.ensureInstructionForOpTag(iTagLine);
3648
3649 # Flatten as a space separated list, split it up and validate the values.
3650 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3651 if len(asHints) == 1 and asHints[0].lower() == 'none':
3652 asHints = [];
3653 else:
3654 fRc = True;
3655 for iHint, sHint in enumerate(asHints):
3656 if sHint not in g_kdHints:
3657 if sHint.strip() in g_kdHints:
3658 sHint[iHint] = sHint.strip();
3659 else:
3660 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3661 if not fRc:
3662 return False;
3663
3664 # Append them.
3665 for sHint in asHints:
3666 if sHint not in oInstr.dHints:
3667 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3668 else:
3669 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3670
3671 _ = iEndLine;
3672 return True;
3673
3674 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3675 """
3676 Tag: \@opdisenum
3677 Value: OP_XXXX
3678
3679 This is for select a specific (legacy) disassembler enum value for the
3680 instruction.
3681 """
3682 oInstr = self.ensureInstructionForOpTag(iTagLine);
3683
3684 # Flatten and split.
3685 asWords = self.flattenAllSections(aasSections).split();
3686 if len(asWords) != 1:
3687 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3688 if not asWords:
3689 return False;
3690 sDisEnum = asWords[0];
3691 if not self.oReDisEnum.match(sDisEnum):
3692 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3693 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3694
3695 # Set it.
3696 if oInstr.sDisEnum is not None:
3697 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3698 oInstr.sDisEnum = sDisEnum;
3699
3700 _ = iEndLine;
3701 return True;
3702
3703 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3704 """
3705 Tag: \@opmincpu
3706 Value: <simple CPU name>
3707
3708 Indicates when this instruction was introduced.
3709 """
3710 oInstr = self.ensureInstructionForOpTag(iTagLine);
3711
3712 # Flatten the value, split into words, make sure there's just one, valid it.
3713 asCpus = self.flattenAllSections(aasSections).split();
3714 if len(asCpus) > 1:
3715 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3716
3717 sMinCpu = asCpus[0];
3718 if sMinCpu in g_kdCpuNames:
3719 oInstr.sMinCpu = sMinCpu;
3720 else:
3721 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3722 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3723
3724 # Set it.
3725 if oInstr.sMinCpu is None:
3726 oInstr.sMinCpu = sMinCpu;
3727 elif oInstr.sMinCpu != sMinCpu:
3728 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3729
3730 _ = iEndLine;
3731 return True;
3732
3733 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3734 """
3735 Tag: \@opcpuid
3736 Value: none | <CPUID flag specifier>
3737
3738 CPUID feature bit which is required for the instruction to be present.
3739 """
3740 oInstr = self.ensureInstructionForOpTag(iTagLine);
3741
3742 # Flatten as a space separated list, split it up and validate the values.
3743 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3744 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3745 asCpuIds = [];
3746 else:
3747 fRc = True;
3748 for iCpuId, sCpuId in enumerate(asCpuIds):
3749 if sCpuId not in g_kdCpuIdFlags:
3750 if sCpuId.strip() in g_kdCpuIdFlags:
3751 sCpuId[iCpuId] = sCpuId.strip();
3752 else:
3753 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3754 if not fRc:
3755 return False;
3756
3757 # Append them.
3758 for sCpuId in asCpuIds:
3759 if sCpuId not in oInstr.asCpuIds:
3760 oInstr.asCpuIds.append(sCpuId);
3761 else:
3762 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3763
3764 _ = iEndLine;
3765 return True;
3766
3767 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3768 """
3769 Tag: \@opgroup
3770 Value: op_grp1[_subgrp2[_subsubgrp3]]
3771
3772 Instruction grouping.
3773 """
3774 oInstr = self.ensureInstructionForOpTag(iTagLine);
3775
3776 # Flatten as a space separated list, split it up and validate the values.
3777 asGroups = self.flattenAllSections(aasSections).split();
3778 if len(asGroups) != 1:
3779 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3780 sGroup = asGroups[0];
3781 if not self.oReGroupName.match(sGroup):
3782 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3783 % (sTag, sGroup, self.oReGroupName.pattern));
3784
3785 # Set it.
3786 if oInstr.sGroup is not None:
3787 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3788 oInstr.sGroup = sGroup;
3789
3790 _ = iEndLine;
3791 return True;
3792
3793 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3794 """
3795 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3796 Value: <invalid opcode behaviour style>
3797
3798 The \@opunused indicates the specification is for a currently unused
3799 instruction encoding.
3800
3801 The \@opinvalid indicates the specification is for an invalid currently
3802 instruction encoding (like UD2).
3803
3804 The \@opinvlstyle just indicates how CPUs decode the instruction when
3805 not supported (\@opcpuid, \@opmincpu) or disabled.
3806 """
3807 oInstr = self.ensureInstructionForOpTag(iTagLine);
3808
3809 # Flatten as a space separated list, split it up and validate the values.
3810 asStyles = self.flattenAllSections(aasSections).split();
3811 if len(asStyles) != 1:
3812 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3813 sStyle = asStyles[0];
3814 if sStyle not in g_kdInvalidStyles:
3815 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3816 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3817 # Set it.
3818 if oInstr.sInvalidStyle is not None:
3819 return self.errorComment(iTagLine,
3820 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3821 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3822 oInstr.sInvalidStyle = sStyle;
3823 if sTag == '@opunused':
3824 oInstr.fUnused = True;
3825 elif sTag == '@opinvalid':
3826 oInstr.fInvalid = True;
3827
3828 _ = iEndLine;
3829 return True;
3830
3831 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3832 """
3833 Tag: \@optest
3834 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3835 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3836
3837 The main idea here is to generate basic instruction tests.
3838
3839 The probably simplest way of handling the diverse input, would be to use
3840 it to produce size optimized byte code for a simple interpreter that
3841 modifies the register input and output states.
3842
3843 An alternative to the interpreter would be creating multiple tables,
3844 but that becomes rather complicated wrt what goes where and then to use
3845 them in an efficient manner.
3846 """
3847 oInstr = self.ensureInstructionForOpTag(iTagLine);
3848
3849 #
3850 # Do it section by section.
3851 #
3852 for asSectionLines in aasSections:
3853 #
3854 # Sort the input into outputs, inputs and selector conditions.
3855 #
3856 sFlatSection = self.flattenAllSections([asSectionLines,]);
3857 if not sFlatSection:
3858 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3859 continue;
3860 oTest = InstructionTest(oInstr);
3861
3862 asSelectors = [];
3863 asInputs = [];
3864 asOutputs = [];
3865 asCur = asOutputs;
3866 fRc = True;
3867 asWords = sFlatSection.split();
3868 for iWord in range(len(asWords) - 1, -1, -1):
3869 sWord = asWords[iWord];
3870 # Check for array switchers.
3871 if sWord == '->':
3872 if asCur != asOutputs:
3873 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3874 break;
3875 asCur = asInputs;
3876 elif sWord == '/':
3877 if asCur != asInputs:
3878 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3879 break;
3880 asCur = asSelectors;
3881 else:
3882 asCur.insert(0, sWord);
3883
3884 #
3885 # Validate and add selectors.
3886 #
3887 for sCond in asSelectors:
3888 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3889 oSelector = None;
3890 for sOp in TestSelector.kasCompareOps:
3891 off = sCondExp.find(sOp);
3892 if off >= 0:
3893 sVariable = sCondExp[:off];
3894 sValue = sCondExp[off + len(sOp):];
3895 if sVariable in TestSelector.kdVariables:
3896 if sValue in TestSelector.kdVariables[sVariable]:
3897 oSelector = TestSelector(sVariable, sOp, sValue);
3898 else:
3899 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3900 % ( sTag, sValue, sCond,
3901 TestSelector.kdVariables[sVariable].keys(),));
3902 else:
3903 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3904 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3905 break;
3906 if oSelector is not None:
3907 for oExisting in oTest.aoSelectors:
3908 if oExisting.sVariable == oSelector.sVariable:
3909 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3910 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3911 oTest.aoSelectors.append(oSelector);
3912 else:
3913 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3914
3915 #
3916 # Validate outputs and inputs, adding them to the test as we go along.
3917 #
3918 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3919 asValidFieldKinds = [ 'both', sDesc, ];
3920 for sItem in asItems:
3921 oItem = None;
3922 for sOp in TestInOut.kasOperators:
3923 off = sItem.find(sOp);
3924 if off < 0:
3925 continue;
3926 sField = sItem[:off];
3927 sValueType = sItem[off + len(sOp):];
3928 if sField in TestInOut.kdFields \
3929 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3930 asSplit = sValueType.split(':', 1);
3931 sValue = asSplit[0];
3932 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3933 if sType in TestInOut.kdTypes:
3934 oValid = TestInOut.kdTypes[sType].validate(sValue);
3935 if oValid is True:
3936 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3937 oItem = TestInOut(sField, sOp, sValue, sType);
3938 else:
3939 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3940 % ( sTag, sDesc, sItem, ));
3941 else:
3942 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3943 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3944 else:
3945 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3946 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3947 else:
3948 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
3949 % ( sTag, sDesc, sField, sItem,
3950 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
3951 if asVal[1] in asValidFieldKinds]),));
3952 break;
3953 if oItem is not None:
3954 for oExisting in aoDst:
3955 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
3956 self.errorComment(iTagLine,
3957 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
3958 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
3959 aoDst.append(oItem);
3960 else:
3961 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
3962
3963 #
3964 # .
3965 #
3966 if fRc:
3967 oInstr.aoTests.append(oTest);
3968 else:
3969 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
3970 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
3971 % (sTag, asSelectors, asInputs, asOutputs,));
3972
3973 _ = iEndLine;
3974 return True;
3975
3976 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
3977 """
3978 Numbered \@optest tag. Either \@optest42 or \@optest[42].
3979 """
3980 oInstr = self.ensureInstructionForOpTag(iTagLine);
3981
3982 iTest = 0;
3983 if sTag[-1] == ']':
3984 iTest = int(sTag[8:-1]);
3985 else:
3986 iTest = int(sTag[7:]);
3987
3988 if iTest != len(oInstr.aoTests):
3989 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
3990 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
3991
3992 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
3993 """
3994 Tag: \@optestign | \@optestignore
3995 Value: <value is ignored>
3996
3997 This is a simple trick to ignore a test while debugging another.
3998
3999 See also \@oponlytest.
4000 """
4001 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4002 return True;
4003
4004 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4005 """
4006 Tag: \@opcopytests
4007 Value: <opstat | function> [..]
4008 Example: \@opcopytests add_Eb_Gb
4009
4010 Trick to avoid duplicating tests for different encodings of the same
4011 operation.
4012 """
4013 oInstr = self.ensureInstructionForOpTag(iTagLine);
4014
4015 # Flatten, validate and append the copy job to the instruction. We execute
4016 # them after parsing all the input so we can handle forward references.
4017 asToCopy = self.flattenAllSections(aasSections).split();
4018 if not asToCopy:
4019 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4020 for sToCopy in asToCopy:
4021 if sToCopy not in oInstr.asCopyTests:
4022 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4023 oInstr.asCopyTests.append(sToCopy);
4024 else:
4025 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4026 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4027 else:
4028 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4029
4030 _ = iEndLine;
4031 return True;
4032
4033 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4034 """
4035 Tag: \@oponlytest | \@oponly
4036 Value: none
4037
4038 Only test instructions with this tag. This is a trick that is handy
4039 for singling out one or two new instructions or tests.
4040
4041 See also \@optestignore.
4042 """
4043 oInstr = self.ensureInstructionForOpTag(iTagLine);
4044
4045 # Validate and add instruction to only test dictionary.
4046 sValue = self.flattenAllSections(aasSections).strip();
4047 if sValue:
4048 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4049
4050 if oInstr not in g_aoOnlyTestInstructions:
4051 g_aoOnlyTestInstructions.append(oInstr);
4052
4053 _ = iEndLine;
4054 return True;
4055
4056 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4057 """
4058 Tag: \@opxcpttype
4059 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4060
4061 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4062 """
4063 oInstr = self.ensureInstructionForOpTag(iTagLine);
4064
4065 # Flatten as a space separated list, split it up and validate the values.
4066 asTypes = self.flattenAllSections(aasSections).split();
4067 if len(asTypes) != 1:
4068 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4069 sType = asTypes[0];
4070 if sType not in g_kdXcptTypes:
4071 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4072 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4073 # Set it.
4074 if oInstr.sXcptType is not None:
4075 return self.errorComment(iTagLine,
4076 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4077 % ( sTag, oInstr.sXcptType, sType,));
4078 oInstr.sXcptType = sType;
4079
4080 _ = iEndLine;
4081 return True;
4082
4083 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4084 """
4085 Tag: \@opfunction
4086 Value: <VMM function name>
4087
4088 This is for explicitly setting the IEM function name. Normally we pick
4089 this up from the FNIEMOP_XXX macro invocation after the description, or
4090 generate it from the mnemonic and operands.
4091
4092 It it thought it maybe necessary to set it when specifying instructions
4093 which implementation isn't following immediately or aren't implemented yet.
4094 """
4095 oInstr = self.ensureInstructionForOpTag(iTagLine);
4096
4097 # Flatten and validate the value.
4098 sFunction = self.flattenAllSections(aasSections);
4099 if not self.oReFunctionName.match(sFunction):
4100 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4101 % (sTag, sFunction, self.oReFunctionName.pattern));
4102
4103 if oInstr.sFunction is not None:
4104 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4105 % (sTag, oInstr.sFunction, sFunction,));
4106 oInstr.sFunction = sFunction;
4107
4108 _ = iEndLine;
4109 return True;
4110
4111 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4112 """
4113 Tag: \@opstats
4114 Value: <VMM statistics base name>
4115
4116 This is for explicitly setting the statistics name. Normally we pick
4117 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4118 the mnemonic and operands.
4119
4120 It it thought it maybe necessary to set it when specifying instructions
4121 which implementation isn't following immediately or aren't implemented yet.
4122 """
4123 oInstr = self.ensureInstructionForOpTag(iTagLine);
4124
4125 # Flatten and validate the value.
4126 sStats = self.flattenAllSections(aasSections);
4127 if not self.oReStatsName.match(sStats):
4128 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4129 % (sTag, sStats, self.oReStatsName.pattern));
4130
4131 if oInstr.sStats is not None:
4132 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4133 % (sTag, oInstr.sStats, sStats,));
4134 oInstr.sStats = sStats;
4135
4136 _ = iEndLine;
4137 return True;
4138
4139 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4140 """
4141 Tag: \@opdone
4142 Value: none
4143
4144 Used to explictily flush the instructions that have been specified.
4145 """
4146 sFlattened = self.flattenAllSections(aasSections);
4147 if sFlattened != '':
4148 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4149 _ = sTag; _ = iEndLine;
4150 return self.doneInstructions();
4151
4152 ## @}
4153
4154
4155 def parseComment(self):
4156 """
4157 Parse the current comment (self.sComment).
4158
4159 If it's a opcode specifiying comment, we reset the macro stuff.
4160 """
4161 #
4162 # Reject if comment doesn't seem to contain anything interesting.
4163 #
4164 if self.sComment.find('Opcode') < 0 \
4165 and self.sComment.find('@') < 0:
4166 return False;
4167
4168 #
4169 # Split the comment into lines, removing leading asterisks and spaces.
4170 # Also remove leading and trailing empty lines.
4171 #
4172 asLines = self.sComment.split('\n');
4173 for iLine, sLine in enumerate(asLines):
4174 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4175
4176 while asLines and not asLines[0]:
4177 self.iCommentLine += 1;
4178 asLines.pop(0);
4179
4180 while asLines and not asLines[-1]:
4181 asLines.pop(len(asLines) - 1);
4182
4183 #
4184 # Check for old style: Opcode 0x0f 0x12
4185 #
4186 if asLines[0].startswith('Opcode '):
4187 self.parseCommentOldOpcode(asLines);
4188
4189 #
4190 # Look for @op* tagged data.
4191 #
4192 cOpTags = 0;
4193 sFlatDefault = None;
4194 sCurTag = '@default';
4195 iCurTagLine = 0;
4196 asCurSection = [];
4197 aasSections = [ asCurSection, ];
4198 for iLine, sLine in enumerate(asLines):
4199 if not sLine.startswith('@'):
4200 if sLine:
4201 asCurSection.append(sLine);
4202 elif asCurSection:
4203 asCurSection = [];
4204 aasSections.append(asCurSection);
4205 else:
4206 #
4207 # Process the previous tag.
4208 #
4209 if not asCurSection and len(aasSections) > 1:
4210 aasSections.pop(-1);
4211 if sCurTag in self.dTagHandlers:
4212 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4213 cOpTags += 1;
4214 elif sCurTag.startswith('@op'):
4215 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4216 elif sCurTag == '@default':
4217 sFlatDefault = self.flattenAllSections(aasSections);
4218 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4219 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4220 elif sCurTag in ['@encoding', '@opencoding']:
4221 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4222
4223 #
4224 # New tag.
4225 #
4226 asSplit = sLine.split(None, 1);
4227 sCurTag = asSplit[0].lower();
4228 if len(asSplit) > 1:
4229 asCurSection = [asSplit[1],];
4230 else:
4231 asCurSection = [];
4232 aasSections = [asCurSection, ];
4233 iCurTagLine = iLine;
4234
4235 #
4236 # Process the final tag.
4237 #
4238 if not asCurSection and len(aasSections) > 1:
4239 aasSections.pop(-1);
4240 if sCurTag in self.dTagHandlers:
4241 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4242 cOpTags += 1;
4243 elif sCurTag.startswith('@op'):
4244 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4245 elif sCurTag == '@default':
4246 sFlatDefault = self.flattenAllSections(aasSections);
4247
4248 #
4249 # Don't allow default text in blocks containing @op*.
4250 #
4251 if cOpTags > 0 and sFlatDefault:
4252 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4253
4254 return True;
4255
4256 def parseMacroInvocation(self, sInvocation):
4257 """
4258 Parses a macro invocation.
4259
4260 Returns a tuple, first element is the offset following the macro
4261 invocation. The second element is a list of macro arguments, where the
4262 zero'th is the macro name.
4263 """
4264 # First the name.
4265 offOpen = sInvocation.find('(');
4266 if offOpen <= 0:
4267 self.raiseError("macro invocation open parenthesis not found");
4268 sName = sInvocation[:offOpen].strip();
4269 if not self.oReMacroName.match(sName):
4270 return self.error("invalid macro name '%s'" % (sName,));
4271 asRet = [sName, ];
4272
4273 # Arguments.
4274 iLine = self.iLine;
4275 cDepth = 1;
4276 off = offOpen + 1;
4277 offStart = off;
4278 chQuote = None;
4279 while cDepth > 0:
4280 if off >= len(sInvocation):
4281 if iLine >= len(self.asLines):
4282 self.error('macro invocation beyond end of file');
4283 return (off, asRet);
4284 sInvocation += self.asLines[iLine];
4285 iLine += 1;
4286 ch = sInvocation[off];
4287
4288 if chQuote:
4289 if ch == '\\' and off + 1 < len(sInvocation):
4290 off += 1;
4291 elif ch == chQuote:
4292 chQuote = None;
4293 elif ch in ('"', '\'',):
4294 chQuote = ch;
4295 elif ch in (',', ')',):
4296 if cDepth == 1:
4297 asRet.append(sInvocation[offStart:off].strip());
4298 offStart = off + 1;
4299 if ch == ')':
4300 cDepth -= 1;
4301 elif ch == '(':
4302 cDepth += 1;
4303 off += 1;
4304
4305 return (off, asRet);
4306
4307 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4308 """
4309 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4310 """
4311 offHit = sCode.find(sMacro);
4312 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4313 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4314 return (offHit + offAfter, asRet);
4315 return (len(sCode), None);
4316
4317 def findAndParseMacroInvocation(self, sCode, sMacro):
4318 """
4319 Returns None if not found, arguments as per parseMacroInvocation if found.
4320 """
4321 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4322
4323 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4324 """
4325 Returns same as findAndParseMacroInvocation.
4326 """
4327 for sMacro in asMacro:
4328 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4329 if asRet is not None:
4330 return asRet;
4331 return None;
4332
4333 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4334 sDisHints, sIemHints, asOperands):
4335 """
4336 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4337 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4338 """
4339 #
4340 # Some invocation checks.
4341 #
4342 if sUpper != sUpper.upper():
4343 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4344 if sLower != sLower.lower():
4345 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4346 if sUpper.lower() != sLower:
4347 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4348 if not self.oReMnemonic.match(sLower):
4349 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4350
4351 #
4352 # Check if sIemHints tells us to not consider this macro invocation.
4353 #
4354 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4355 return True;
4356
4357 # Apply to the last instruction only for now.
4358 if not self.aoCurInstrs:
4359 self.addInstruction();
4360 oInstr = self.aoCurInstrs[-1];
4361 if oInstr.iLineMnemonicMacro == -1:
4362 oInstr.iLineMnemonicMacro = self.iLine;
4363 else:
4364 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4365 % (sMacro, oInstr.iLineMnemonicMacro,));
4366
4367 # Mnemonic
4368 if oInstr.sMnemonic is None:
4369 oInstr.sMnemonic = sLower;
4370 elif oInstr.sMnemonic != sLower:
4371 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4372
4373 # Process operands.
4374 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4375 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4376 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4377 for iOperand, sType in enumerate(asOperands):
4378 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4379 if sWhere is None:
4380 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4381 if iOperand < len(oInstr.aoOperands): # error recovery.
4382 sWhere = oInstr.aoOperands[iOperand].sWhere;
4383 sType = oInstr.aoOperands[iOperand].sType;
4384 else:
4385 sWhere = 'reg';
4386 sType = 'Gb';
4387 if iOperand == len(oInstr.aoOperands):
4388 oInstr.aoOperands.append(Operand(sWhere, sType))
4389 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4390 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4391 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4392 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4393
4394 # Encoding.
4395 if sForm not in g_kdIemForms:
4396 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4397 else:
4398 if oInstr.sEncoding is None:
4399 oInstr.sEncoding = g_kdIemForms[sForm][0];
4400 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4401 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4402 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4403
4404 # Check the parameter locations for the encoding.
4405 if g_kdIemForms[sForm][1] is not None:
4406 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4407 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4408 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4409 else:
4410 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4411 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4412 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4413 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4414 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4415 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4416 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4417 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4418 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4419 or sForm.replace('VEX','').find('V') < 0) ):
4420 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4421 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4422 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4423 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4424 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4425 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4426 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4427 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4428 oInstr.aoOperands[iOperand].sWhere));
4429
4430
4431 # Check @opcodesub
4432 if oInstr.sSubOpcode \
4433 and g_kdIemForms[sForm][2] \
4434 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4435 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4436 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4437
4438 # Stats.
4439 if not self.oReStatsName.match(sStats):
4440 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4441 elif oInstr.sStats is None:
4442 oInstr.sStats = sStats;
4443 elif oInstr.sStats != sStats:
4444 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4445 % (sMacro, oInstr.sStats, sStats,));
4446
4447 # Process the hints (simply merge with @ophints w/o checking anything).
4448 for sHint in sDisHints.split('|'):
4449 sHint = sHint.strip();
4450 if sHint.startswith('DISOPTYPE_'):
4451 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4452 if sShortHint in g_kdHints:
4453 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4454 else:
4455 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4456 elif sHint != '0':
4457 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4458
4459 for sHint in sIemHints.split('|'):
4460 sHint = sHint.strip();
4461 if sHint.startswith('IEMOPHINT_'):
4462 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4463 if sShortHint in g_kdHints:
4464 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4465 else:
4466 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4467 elif sHint != '0':
4468 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4469
4470 _ = sAsm;
4471 return True;
4472
4473 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4474 """
4475 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4476 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4477 """
4478 if not asOperands:
4479 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4480 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4481 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4482
4483 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4484 """
4485 Process a IEM_MC_BEGIN macro invocation.
4486 """
4487 if self.fDebugMc:
4488 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4489 #self.debug('%s<eos>' % (sCode,));
4490
4491 # Check preconditions.
4492 if not self.sCurFunction:
4493 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4494 if self.oCurMcBlock:
4495 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4496
4497 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4498 cchIndent = offBeginStatementInCodeStr;
4499 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4500 if offPrevNewline >= 0:
4501 cchIndent -= offPrevNewline + 1;
4502 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.sCurFunction));
4503
4504 # Start a new block.
4505 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4506 self.sCurFunction, self.iMcBlockInFunc, cchIndent);
4507 g_aoMcBlocks.append(self.oCurMcBlock);
4508 self.cTotalMcBlocks += 1;
4509 self.iMcBlockInFunc += 1;
4510 return True;
4511
4512 def workerIemMcEnd(self, offEndStatementInLine):
4513 """
4514 Process a IEM_MC_END macro invocation.
4515 """
4516 if self.fDebugMc:
4517 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4518
4519 # Check preconditions.
4520 if not self.oCurMcBlock:
4521 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4522
4523 #
4524 # Complete and discard the current block.
4525 #
4526 # HACK ALERT! For blocks orginating from macro expansion the start and
4527 # end line will be the same, but the line has multiple
4528 # newlines inside it. So, we have to do some extra tricks
4529 # to get the lines out of there. We ASSUME macros aren't
4530 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4531 #
4532 if self.iLine > self.oCurMcBlock.iBeginLine:
4533 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4534 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4535 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4536 else:
4537 sRawLine = self.asLines[self.iLine - 1];
4538
4539 off = sRawLine.find('\n', offEndStatementInLine);
4540 if off > 0:
4541 sRawLine = sRawLine[:off + 1];
4542
4543 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4544 sRawLine = sRawLine[off:];
4545 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4546 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4547
4548 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4549
4550 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4551 self.oCurMcBlock = None;
4552 return True;
4553
4554 def checkCodeForMacro(self, sCode, offLine):
4555 """
4556 Checks code for relevant macro invocation.
4557 """
4558
4559 #
4560 # Scan macro invocations.
4561 #
4562 if sCode.find('(') > 0:
4563 # Look for instruction decoder function definitions. ASSUME single line.
4564 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4565 [ 'FNIEMOP_DEF',
4566 'FNIEMOPRM_DEF',
4567 'FNIEMOP_STUB',
4568 'FNIEMOP_STUB_1',
4569 'FNIEMOP_UD_STUB',
4570 'FNIEMOP_UD_STUB_1' ]);
4571 if asArgs is not None:
4572 self.sCurFunction = asArgs[1];
4573 #self.debug('%s: sCurFunction=%s' % (self.iLine, self.sCurFunction,));
4574
4575 if not self.aoCurInstrs:
4576 self.addInstruction();
4577 for oInstr in self.aoCurInstrs:
4578 if oInstr.iLineFnIemOpMacro == -1:
4579 oInstr.iLineFnIemOpMacro = self.iLine;
4580 else:
4581 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4582 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4583 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4584 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4585 if asArgs[0].find('STUB') > 0:
4586 self.doneInstructions(fEndOfFunction = True);
4587 return True;
4588
4589 # Check for worker function definitions, so we can get a context for MC blocks.
4590 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4591 [ 'FNIEMOP_DEF_1',
4592 'FNIEMOP_DEF_2', ]);
4593 if asArgs is not None:
4594 self.sCurFunction = asArgs[1];
4595 #self.debug('%s: sCurFunction=%s (%s)' % (self.iLine, self.sCurFunction, asArgs[0]));
4596 return True;
4597
4598 # IEMOP_HLP_DONE_VEX_DECODING_*
4599 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4600 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4601 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4602 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4603 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4604 ]);
4605 if asArgs is not None:
4606 sMacro = asArgs[0];
4607 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4608 for oInstr in self.aoCurInstrs:
4609 if 'vex_l_zero' not in oInstr.dHints:
4610 if oInstr.iLineMnemonicMacro >= 0:
4611 self.errorOnLine(oInstr.iLineMnemonicMacro,
4612 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4613 oInstr.dHints['vex_l_zero'] = True;
4614
4615 #
4616 # IEMOP_MNEMONIC*
4617 #
4618 if sCode.find('IEMOP_MNEMONIC') >= 0:
4619 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4620 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4621 if asArgs is not None:
4622 if len(self.aoCurInstrs) == 1:
4623 oInstr = self.aoCurInstrs[0];
4624 if oInstr.sStats is None:
4625 oInstr.sStats = asArgs[1];
4626 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4627
4628 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4629 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4630 if asArgs is not None:
4631 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4632 asArgs[7], []);
4633 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4634 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4635 if asArgs is not None:
4636 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4637 asArgs[8], [asArgs[6],]);
4638 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4639 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4640 if asArgs is not None:
4641 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4642 asArgs[9], [asArgs[6], asArgs[7]]);
4643 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4644 # a_fIemHints)
4645 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4646 if asArgs is not None:
4647 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4648 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4649 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4650 # a_fIemHints)
4651 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4652 if asArgs is not None:
4653 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4654 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4655
4656 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4657 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4658 if asArgs is not None:
4659 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4660 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4661 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4662 if asArgs is not None:
4663 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4664 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4665 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4666 if asArgs is not None:
4667 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4668 [asArgs[4], asArgs[5],]);
4669 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4670 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4671 if asArgs is not None:
4672 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4673 [asArgs[4], asArgs[5], asArgs[6],]);
4674 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4675 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4676 if asArgs is not None:
4677 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4678 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4679
4680 #
4681 # IEM_MC_BEGIN + IEM_MC_END.
4682 # We must support multiple instances per code snippet.
4683 #
4684 offCode = sCode.find('IEM_MC_');
4685 if offCode >= 0:
4686 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4687 if oMatch.group(1) == 'END':
4688 self.workerIemMcEnd(offLine + oMatch.start());
4689 else:
4690 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4691 return True;
4692
4693 return False;
4694
4695 def workerPreProcessRecreateMacroRegex(self):
4696 """
4697 Recreates self.oReMacros when self.dMacros changes.
4698 """
4699 if self.dMacros:
4700 sRegex = '';
4701 for sName, oMacro in self.dMacros.items():
4702 if sRegex:
4703 sRegex += '|' + sName;
4704 else:
4705 sRegex = '\\b(' + sName;
4706 if oMacro.asArgs is not None:
4707 sRegex += '\s*\(';
4708 else:
4709 sRegex += '\\b';
4710 sRegex += ')';
4711 self.oReMacros = re.compile(sRegex);
4712 else:
4713 self.oReMacros = None;
4714 return True;
4715
4716 def workerPreProcessDefine(self, sRest):
4717 """
4718 Handles a macro #define, the sRest is what follows after the directive word.
4719 """
4720
4721 #
4722 # If using line continutation, just concat all the lines together,
4723 # preserving the newline character but not the escaping.
4724 #
4725 iLineStart = self.iLine;
4726 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4727 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4728 self.iLine += 1;
4729 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4730
4731 #
4732 # Use regex to split out the name, argument list and body.
4733 # If this fails, we assume it's a simple macro.
4734 #
4735 oMatch = self.oReHashDefine2.match(sRest);
4736 if oMatch:
4737 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4738 sBody = oMatch.group(3);
4739 else:
4740 oMatch = self.oReHashDefine3.match(sRest);
4741 if not oMatch:
4742 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4743 return self.error('bogus macro definition: %s' % (sRest,));
4744 asArgs = None;
4745 sBody = oMatch.group(2);
4746 sName = oMatch.group(1);
4747 assert sName == sName.strip();
4748 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4749
4750 #
4751 # Is this of any interest to us? We do NOT support MC blocks wihtin
4752 # nested macro expansion, just to avoid lots of extra work.
4753 #
4754 if sBody.find("IEM_MC_BEGIN") < 0:
4755 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4756 return True;
4757
4758 #
4759 # Add the macro.
4760 #
4761 if self.fDebugPreProc:
4762 self.debug('#define %s on line %u' % (sName, self.iLine,));
4763 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4764 return self.workerPreProcessRecreateMacroRegex();
4765
4766 def workerPreProcessUndef(self, sRest):
4767 """
4768 Handles a macro #undef, the sRest is what follows after the directive word.
4769 """
4770 # Quick comment strip and isolate the name.
4771 offSlash = sRest.find('/');
4772 if offSlash > 0:
4773 sRest = sRest[:offSlash];
4774 sName = sRest.strip();
4775
4776 # Remove the macro if we're clocking it.
4777 if sName in self.dMacros:
4778 if self.fDebugPreProc:
4779 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4780 del self.dMacros[sName];
4781 return self.workerPreProcessRecreateMacroRegex();
4782
4783 return True;
4784
4785 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4786 """
4787 Handles a preprocessor directive.
4788 """
4789 oMatch = self.oReHashDefine.match(sLine);
4790 if oMatch:
4791 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4792
4793 oMatch = self.oReHashUndef.match(sLine);
4794 if oMatch:
4795 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4796 return False;
4797
4798 def expandMacros(self, sLine, oMatch):
4799 """
4800 Expands macros we know about in the given line.
4801 Currently we ASSUME there is only one and that is what oMatch matched.
4802 """
4803 #
4804 # Get our bearings.
4805 #
4806 offMatch = oMatch.start();
4807 sName = oMatch.group(1);
4808 assert sName == sLine[oMatch.start() : oMatch.end()];
4809 fWithArgs = sName.endswith('(');
4810 if fWithArgs:
4811 sName = sName[:-1].strip();
4812 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4813
4814 #
4815 # Deal with simple macro invocations w/o parameters.
4816 #
4817 if not fWithArgs:
4818 if self.fDebugPreProc:
4819 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4820 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4821
4822 #
4823 # Complicated macro with parameters.
4824 # Start by extracting the parameters. ASSUMES they are all on the same line!
4825 #
4826 cLevel = 1;
4827 offCur = oMatch.end();
4828 offCurArg = offCur;
4829 asArgs = [];
4830 while True:
4831 ch = sLine[offCur];
4832 if ch == '(':
4833 cLevel += 1;
4834 elif ch == ')':
4835 cLevel -= 1;
4836 if cLevel == 0:
4837 asArgs.append(sLine[offCurArg:offCur].strip());
4838 break;
4839 elif ch == ',' and cLevel == 1:
4840 asArgs.append(sLine[offCurArg:offCur].strip());
4841 offCurArg = offCur + 1;
4842 offCur += 1;
4843 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4844 asArgs = [];
4845 if len(oMacro.asArgs) != len(asArgs):
4846 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4847
4848 #
4849 # Do the expanding.
4850 #
4851 if self.fDebugPreProc:
4852 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4853 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4854
4855 def parse(self):
4856 """
4857 Parses the given file.
4858 Returns number or errors.
4859 Raises exception on fatal trouble.
4860 """
4861 #self.debug('Parsing %s' % (self.sSrcFile,));
4862
4863 while self.iLine < len(self.asLines):
4864 sLine = self.asLines[self.iLine];
4865 self.iLine += 1;
4866 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4867
4868 # Expand macros we know about if we're currently in code.
4869 if self.iState == self.kiCode and self.oReMacros:
4870 oMatch = self.oReMacros.search(sLine);
4871 if oMatch:
4872 sLine = self.expandMacros(sLine, oMatch);
4873 if self.fDebugPreProc:
4874 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4875 self.asLines[self.iLine - 1] = sLine;
4876
4877 # Look for comments.
4878 offSlash = sLine.find('/');
4879 if offSlash >= 0:
4880 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4881 offLine = 0;
4882 while offLine < len(sLine):
4883 if self.iState == self.kiCode:
4884 # Look for substantial multiline comment so we pass the following MC as a whole line:
4885 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4886 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4887 offHit = sLine.find('/*', offLine);
4888 while offHit >= 0:
4889 offEnd = sLine.find('*/', offHit + 2);
4890 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4891 break;
4892 offHit = sLine.find('/*', offEnd);
4893
4894 if offHit >= 0:
4895 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4896 self.sComment = '';
4897 self.iCommentLine = self.iLine;
4898 self.iState = self.kiCommentMulti;
4899 offLine = offHit + 2;
4900 else:
4901 self.checkCodeForMacro(sLine[offLine:], offLine);
4902 offLine = len(sLine);
4903
4904 elif self.iState == self.kiCommentMulti:
4905 offHit = sLine.find('*/', offLine);
4906 if offHit >= 0:
4907 self.sComment += sLine[offLine:offHit];
4908 self.iState = self.kiCode;
4909 offLine = offHit + 2;
4910 self.parseComment();
4911 else:
4912 self.sComment += sLine[offLine:];
4913 offLine = len(sLine);
4914 else:
4915 assert False;
4916 # C++ line comment.
4917 elif offSlash > 0:
4918 self.checkCodeForMacro(sLine[:offSlash], 0);
4919
4920 # No slash, but append the line if in multi-line comment.
4921 elif self.iState == self.kiCommentMulti:
4922 #self.debug('line %d: multi' % (self.iLine,));
4923 self.sComment += sLine;
4924
4925 # No slash, but check if this is a macro #define or #undef, since we
4926 # need to be able to selectively expand the ones containing MC blocks.
4927 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
4928 if self.fDebugPreProc:
4929 self.debug('line %d: pre-proc' % (self.iLine,));
4930 self.checkPreProcessorDirectiveForDefineUndef(sLine);
4931
4932 # No slash, but check code line for relevant macro.
4933 elif ( self.iState == self.kiCode
4934 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
4935 #self.debug('line %d: macro' % (self.iLine,));
4936 self.checkCodeForMacro(sLine, 0);
4937
4938 # If the line is a '}' in the first position, complete the instructions.
4939 elif self.iState == self.kiCode and sLine[0] == '}':
4940 #self.debug('line %d: }' % (self.iLine,));
4941 self.doneInstructions(fEndOfFunction = True);
4942
4943 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
4944 # so we can check/add @oppfx info from it.
4945 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
4946 self.parseFunctionTable(sLine);
4947
4948 self.doneInstructions(fEndOfFunction = True);
4949 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
4950 % (self.cTotalStubs * 100 // self.cTotalInstr, self.cTotalStubs, self.cTotalInstr, self.cTotalMcBlocks,
4951 os.path.basename(self.sSrcFile),));
4952 return self.printErrors();
4953
4954
4955def __parseFileByName(sSrcFile, sDefaultMap):
4956 """
4957 Parses one source file for instruction specfications.
4958 """
4959 #
4960 # Read sSrcFile into a line array.
4961 #
4962 try:
4963 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
4964 except Exception as oXcpt:
4965 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
4966 try:
4967 asLines = oFile.readlines();
4968 except Exception as oXcpt:
4969 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
4970 finally:
4971 oFile.close();
4972
4973 #
4974 # Do the parsing.
4975 #
4976 try:
4977 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap);
4978 return (oParser.parse(), oParser) ;
4979 except ParserException as oXcpt:
4980 print(str(oXcpt), file = sys.stderr);
4981 raise;
4982
4983
4984def __doTestCopying():
4985 """
4986 Executes the asCopyTests instructions.
4987 """
4988 asErrors = [];
4989 for oDstInstr in g_aoAllInstructions:
4990 if oDstInstr.asCopyTests:
4991 for sSrcInstr in oDstInstr.asCopyTests:
4992 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
4993 if oSrcInstr:
4994 aoSrcInstrs = [oSrcInstr,];
4995 else:
4996 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
4997 if aoSrcInstrs:
4998 for oSrcInstr in aoSrcInstrs:
4999 if oSrcInstr != oDstInstr:
5000 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5001 else:
5002 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5003 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5004 else:
5005 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5006 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5007
5008 if asErrors:
5009 sys.stderr.write(u''.join(asErrors));
5010 return len(asErrors);
5011
5012
5013def __applyOnlyTest():
5014 """
5015 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5016 all other instructions so that only these get tested.
5017 """
5018 if g_aoOnlyTestInstructions:
5019 for oInstr in g_aoAllInstructions:
5020 if oInstr.aoTests:
5021 if oInstr not in g_aoOnlyTestInstructions:
5022 oInstr.aoTests = [];
5023 return 0;
5024
5025## List of all main instruction files and their default maps.
5026g_aasAllInstrFilesAndDefaultMap = (
5027 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5028 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5029 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5030 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5031 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5032 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5033 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5034 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5035 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5036);
5037
5038def __parseFilesWorker(asFilesAndDefaultMap):
5039 """
5040 Parses all the IEMAllInstruction*.cpp.h files.
5041
5042 Returns a list of the parsers on success.
5043 Raises exception on failure.
5044 """
5045 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5046 cErrors = 0;
5047 aoParsers = [];
5048 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5049 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5050 sFilename = os.path.join(sSrcDir, sFilename);
5051 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5052 cErrors += cThisErrors;
5053 aoParsers.append(oParser);
5054 cErrors += __doTestCopying();
5055 cErrors += __applyOnlyTest();
5056
5057 # Total stub stats:
5058 cTotalStubs = 0;
5059 for oInstr in g_aoAllInstructions:
5060 cTotalStubs += oInstr.fStub;
5061 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5062 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5063 file = sys.stderr);
5064
5065 if cErrors != 0:
5066 raise Exception('%d parse errors' % (cErrors,));
5067 return aoParsers;
5068
5069
5070def parseFiles(asFiles):
5071 """
5072 Parses a selection of IEMAllInstruction*.cpp.h files.
5073
5074 Returns a list of the parsers on success.
5075 Raises exception on failure.
5076 """
5077 # Look up default maps for the files and call __parseFilesWorker to do the job.
5078 asFilesAndDefaultMap = [];
5079 for sFilename in asFiles:
5080 sName = os.path.split(sFilename)[1].lower();
5081 sMap = None;
5082 for asCur in g_aasAllInstrFilesAndDefaultMap:
5083 if asCur[0].lower() == sName:
5084 sMap = asCur[1];
5085 break;
5086 if not sMap:
5087 raise Exception('Unable to classify file: %s' % (sFilename,));
5088 asFilesAndDefaultMap.append((sFilename, sMap));
5089
5090 return __parseFilesWorker(asFilesAndDefaultMap);
5091
5092
5093def parseAll():
5094 """
5095 Parses all the IEMAllInstruction*.cpp.h files.
5096
5097 Returns a list of the parsers on success.
5098 Raises exception on failure.
5099 """
5100 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5101
5102
5103#
5104# Generators (may perhaps move later).
5105#
5106def __formatDisassemblerTableEntry(oInstr):
5107 """
5108 """
5109 sMacro = 'OP';
5110 cMaxOperands = 3;
5111 if len(oInstr.aoOperands) > 3:
5112 sMacro = 'OPVEX'
5113 cMaxOperands = 4;
5114 assert len(oInstr.aoOperands) <= cMaxOperands;
5115
5116 #
5117 # Format string.
5118 #
5119 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5120 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5121 sTmp += ' ' if iOperand == 0 else ',';
5122 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5123 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5124 else:
5125 sTmp += g_kdOpTypes[oOperand.sType][2];
5126 sTmp += '",';
5127 asColumns = [ sTmp, ];
5128
5129 #
5130 # Decoders.
5131 #
5132 iStart = len(asColumns);
5133 if oInstr.sEncoding is None:
5134 pass;
5135 elif oInstr.sEncoding == 'ModR/M':
5136 # ASSUME the first operand is using the ModR/M encoding
5137 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5138 asColumns.append('IDX_ParseModRM,');
5139 elif oInstr.sEncoding in [ 'prefix', ]:
5140 for oOperand in oInstr.aoOperands:
5141 asColumns.append('0,');
5142 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5143 pass;
5144 elif oInstr.sEncoding == 'VEX.ModR/M':
5145 asColumns.append('IDX_ParseModRM,');
5146 elif oInstr.sEncoding == 'vex2':
5147 asColumns.append('IDX_ParseVex2b,')
5148 elif oInstr.sEncoding == 'vex3':
5149 asColumns.append('IDX_ParseVex3b,')
5150 elif oInstr.sEncoding in g_dInstructionMaps:
5151 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5152 else:
5153 ## @todo
5154 #IDX_ParseTwoByteEsc,
5155 #IDX_ParseGrp1,
5156 #IDX_ParseShiftGrp2,
5157 #IDX_ParseGrp3,
5158 #IDX_ParseGrp4,
5159 #IDX_ParseGrp5,
5160 #IDX_Parse3DNow,
5161 #IDX_ParseGrp6,
5162 #IDX_ParseGrp7,
5163 #IDX_ParseGrp8,
5164 #IDX_ParseGrp9,
5165 #IDX_ParseGrp10,
5166 #IDX_ParseGrp12,
5167 #IDX_ParseGrp13,
5168 #IDX_ParseGrp14,
5169 #IDX_ParseGrp15,
5170 #IDX_ParseGrp16,
5171 #IDX_ParseThreeByteEsc4,
5172 #IDX_ParseThreeByteEsc5,
5173 #IDX_ParseModFence,
5174 #IDX_ParseEscFP,
5175 #IDX_ParseNopPause,
5176 #IDX_ParseInvOpModRM,
5177 assert False, str(oInstr);
5178
5179 # Check for immediates and stuff in the remaining operands.
5180 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5181 sIdx = g_kdOpTypes[oOperand.sType][0];
5182 #if sIdx != 'IDX_UseModRM':
5183 asColumns.append(sIdx + ',');
5184 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5185
5186 #
5187 # Opcode and operands.
5188 #
5189 assert oInstr.sDisEnum, str(oInstr);
5190 asColumns.append(oInstr.sDisEnum + ',');
5191 iStart = len(asColumns)
5192 for oOperand in oInstr.aoOperands:
5193 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5194 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5195
5196 #
5197 # Flags.
5198 #
5199 sTmp = '';
5200 for sHint in sorted(oInstr.dHints.keys()):
5201 sDefine = g_kdHints[sHint];
5202 if sDefine.startswith('DISOPTYPE_'):
5203 if sTmp:
5204 sTmp += ' | ' + sDefine;
5205 else:
5206 sTmp += sDefine;
5207 if sTmp:
5208 sTmp += '),';
5209 else:
5210 sTmp += '0),';
5211 asColumns.append(sTmp);
5212
5213 #
5214 # Format the columns into a line.
5215 #
5216 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5217 sLine = '';
5218 for i, s in enumerate(asColumns):
5219 if len(sLine) < aoffColumns[i]:
5220 sLine += ' ' * (aoffColumns[i] - len(sLine));
5221 else:
5222 sLine += ' ';
5223 sLine += s;
5224
5225 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5226 # DISOPTYPE_HARMLESS),
5227 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5228 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5229 return sLine;
5230
5231def __checkIfShortTable(aoTableOrdered, oMap):
5232 """
5233 Returns (iInstr, cInstructions, fShortTable)
5234 """
5235
5236 # Determin how much we can trim off.
5237 cInstructions = len(aoTableOrdered);
5238 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5239 cInstructions -= 1;
5240
5241 iInstr = 0;
5242 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5243 iInstr += 1;
5244
5245 # If we can save more than 30%, we go for the short table version.
5246 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5247 return (iInstr, cInstructions, True);
5248 _ = oMap; # Use this for overriding.
5249
5250 # Output the full table.
5251 return (0, len(aoTableOrdered), False);
5252
5253def generateDisassemblerTables(oDstFile = sys.stdout):
5254 """
5255 Generates disassembler tables.
5256
5257 Returns exit code.
5258 """
5259
5260 #
5261 # Parse all.
5262 #
5263 try:
5264 parseAll();
5265 except Exception as oXcpt:
5266 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5267 traceback.print_exc(file = sys.stderr);
5268 return 1;
5269
5270
5271 #
5272 # The disassembler uses a slightly different table layout to save space,
5273 # since several of the prefix varia
5274 #
5275 aoDisasmMaps = [];
5276 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5277 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5278 if oMap.sSelector != 'byte+pfx':
5279 aoDisasmMaps.append(oMap);
5280 else:
5281 # Split the map by prefix.
5282 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5283 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5284 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5285 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5286
5287 #
5288 # Dump each map.
5289 #
5290 asHeaderLines = [];
5291 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5292 for oMap in aoDisasmMaps:
5293 sName = oMap.sName;
5294
5295 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5296
5297 #
5298 # Get the instructions for the map and see if we can do a short version or not.
5299 #
5300 aoTableOrder = oMap.getInstructionsInTableOrder();
5301 cEntriesPerByte = oMap.getEntriesPerByte();
5302 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5303
5304 #
5305 # Output the table start.
5306 # Note! Short tables are static and only accessible via the map range record.
5307 #
5308 asLines = [];
5309 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5310 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5311 if fShortTable:
5312 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5313 else:
5314 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5315 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5316 asLines.append('{');
5317
5318 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5319 asLines.append(' /* %#04x: */' % (iInstrStart,));
5320
5321 #
5322 # Output the instructions.
5323 #
5324 iInstr = iInstrStart;
5325 while iInstr < iInstrEnd:
5326 oInstr = aoTableOrder[iInstr];
5327 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5328 if iInstr != iInstrStart:
5329 asLines.append('');
5330 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5331
5332 if oInstr is None:
5333 # Invalid. Optimize blocks of invalid instructions.
5334 cInvalidInstrs = 1;
5335 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5336 cInvalidInstrs += 1;
5337 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5338 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5339 iInstr += 0x10 * cEntriesPerByte - 1;
5340 elif cEntriesPerByte > 1:
5341 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5342 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5343 iInstr += 3;
5344 else:
5345 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5346 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5347 else:
5348 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5349 elif isinstance(oInstr, list):
5350 if len(oInstr) != 0:
5351 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5352 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5353 else:
5354 asLines.append(__formatDisassemblerTableEntry(oInstr));
5355 else:
5356 asLines.append(__formatDisassemblerTableEntry(oInstr));
5357
5358 iInstr += 1;
5359
5360 if iInstrStart >= iInstrEnd:
5361 asLines.append(' /* dummy */ INVALID_OPCODE');
5362
5363 asLines.append('};');
5364 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5365
5366 #
5367 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5368 #
5369 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5370 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5371 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5372
5373 #
5374 # Write out the lines.
5375 #
5376 oDstFile.write('\n'.join(asLines));
5377 oDstFile.write('\n');
5378 oDstFile.write('\n');
5379 #break; #for now
5380 return 0;
5381
5382if __name__ == '__main__':
5383 sys.exit(generateDisassemblerTables());
5384
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette