VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 99338

Last change on this file since 99338 was 99338, checked in by vboxsync, 23 months ago

VMM/IEM: IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT -> IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT, since the CPUID check was removed they are identical. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 253.6 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 99338 2023-04-07 12:35:06Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 99338 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [];
1828 self.aoElseBranch = [];
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode, sName = 'C++'):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873
1874 def renderCode(self, cchIndent = 0):
1875 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1876 if self.fDecode:
1877 sRet = sRet.replace('\n', ' // C++ decode\n');
1878 else:
1879 sRet = sRet.replace('\n', ' // C++ normal\n');
1880 return sRet;
1881
1882class McCppCond(McStmtCond):
1883 """
1884 C++/C 'if' statement.
1885 """
1886 def __init__(self, sCode, fDecode):
1887 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1888 self.fDecode = fDecode;
1889
1890 def renderCode(self, cchIndent = 0):
1891 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1892 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1893 sRet += ' ' * cchIndent + '{\n';
1894 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1895 sRet += ' ' * cchIndent + '}\n';
1896 if self.aoElseBranch:
1897 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1898 sRet += ' ' * cchIndent + '{\n';
1899 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1900 sRet += ' ' * cchIndent + '}\n';
1901 return sRet;
1902
1903class McCppPreProc(McCppGeneric):
1904 """
1905 C++/C Preprocessor directive.
1906 """
1907 def __init__(self, sCode):
1908 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1909
1910 def renderCode(self, cchIndent = 0):
1911 return self.asParams[0] + '\n';
1912
1913
1914class McBlock(object):
1915 """
1916 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1917 """
1918
1919 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1920 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1921 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1922 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1923 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1924 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1925 self.oFunction = oFunction; ##< The function the block resides in.
1926 self.sFunction = oFunction.sName; ##< The name of the function the block resides in. DEPRECATED.
1927 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1928 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1929 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1930 ## Decoded statements in the block.
1931 self.aoStmts = [] # type: list(McStmt)
1932
1933 def complete(self, iEndLine, offEndLine, asLines):
1934 """
1935 Completes the microcode block.
1936 """
1937 assert self.iEndLine == -1;
1938 self.iEndLine = iEndLine;
1939 self.offEndLine = offEndLine;
1940 self.asLines = asLines;
1941
1942 def raiseDecodeError(self, sRawCode, off, sMessage):
1943 """ Raises a decoding error. """
1944 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1945 iLine = sRawCode.count('\n', 0, off);
1946 raise ParserException('%s:%d:%d: parsing error: %s'
1947 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1948
1949 def raiseStmtError(self, sName, sMessage):
1950 """ Raises a statement parser error. """
1951 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1952
1953 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1954 """ Check the parameter count, raising an error it doesn't match. """
1955 if len(asParams) != cParamsExpected:
1956 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1957 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1958 return True;
1959
1960 @staticmethod
1961 def parseMcGeneric(oSelf, sName, asParams):
1962 """ Generic parser that returns a plain McStmt object. """
1963 _ = oSelf;
1964 return McStmt(sName, asParams);
1965
1966 @staticmethod
1967 def parseMcGenericCond(oSelf, sName, asParams):
1968 """ Generic parser that returns a plain McStmtCond object. """
1969 _ = oSelf;
1970 return McStmtCond(sName, asParams);
1971
1972 @staticmethod
1973 def parseMcBegin(oSelf, sName, asParams):
1974 """ IEM_MC_BEGIN """
1975 oSelf.checkStmtParamCount(sName, asParams, 2);
1976 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1977
1978 @staticmethod
1979 def parseMcArg(oSelf, sName, asParams):
1980 """ IEM_MC_ARG """
1981 oSelf.checkStmtParamCount(sName, asParams, 3);
1982 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1983
1984 @staticmethod
1985 def parseMcArgConst(oSelf, sName, asParams):
1986 """ IEM_MC_ARG_CONST """
1987 oSelf.checkStmtParamCount(sName, asParams, 4);
1988 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1989
1990 @staticmethod
1991 def parseMcArgLocalRef(oSelf, sName, asParams):
1992 """ IEM_MC_ARG_LOCAL_REF """
1993 oSelf.checkStmtParamCount(sName, asParams, 4);
1994 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1995
1996 @staticmethod
1997 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1998 """ IEM_MC_ARG_LOCAL_EFLAGS """
1999 oSelf.checkStmtParamCount(sName, asParams, 3);
2000 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2001 return (
2002 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2003 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2004 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2005 );
2006
2007 @staticmethod
2008 def parseMcLocal(oSelf, sName, asParams):
2009 """ IEM_MC_LOCAL """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2012
2013 @staticmethod
2014 def parseMcLocalConst(oSelf, sName, asParams):
2015 """ IEM_MC_LOCAL_CONST """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2018
2019 @staticmethod
2020 def parseMcCallAImpl(oSelf, sName, asParams):
2021 """ IEM_MC_CALL_AIMPL_3|4 """
2022 cArgs = int(sName[-1]);
2023 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2024 return McStmtCall(sName, asParams, 1, 0);
2025
2026 @staticmethod
2027 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2028 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2029 cArgs = int(sName[-1]);
2030 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2031 return McStmtCall(sName, asParams, 0);
2032
2033 @staticmethod
2034 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2035 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2036 cArgs = int(sName[-1]);
2037 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2038 return McStmtCall(sName, asParams, 0);
2039
2040 @staticmethod
2041 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2042 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2043 cArgs = int(sName[-1]);
2044 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2045 return McStmtCall(sName, asParams, 0);
2046
2047 @staticmethod
2048 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2049 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2050 cArgs = int(sName[-1]);
2051 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2052 return McStmtCall(sName, asParams, 0);
2053
2054 @staticmethod
2055 def parseMcCallSseAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2059 return McStmtCall(sName, asParams, 0);
2060
2061 @staticmethod
2062 def parseMcCallCImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def stripComments(sCode):
2070 """ Returns sCode with comments removed. """
2071 off = 0;
2072 while off < len(sCode):
2073 off = sCode.find('/', off);
2074 if off < 0 or off + 1 >= len(sCode):
2075 break;
2076
2077 if sCode[off + 1] == '/':
2078 # C++ comment.
2079 offEnd = sCode.find('\n', off + 2);
2080 if offEnd < 0:
2081 return sCode[:off].rstrip();
2082 sCode = sCode[ : off] + sCode[offEnd : ];
2083 off += 1;
2084
2085 elif sCode[off + 1] == '*':
2086 # C comment
2087 offEnd = sCode.find('*/', off + 2);
2088 if offEnd < 0:
2089 return sCode[:off].rstrip();
2090 sSep = ' ';
2091 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2092 sSep = '';
2093 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2094 off += len(sSep);
2095
2096 else:
2097 # Not a comment.
2098 off += 1;
2099 return sCode;
2100
2101 @staticmethod
2102 def extractParam(sCode, offParam):
2103 """
2104 Extracts the parameter value at offParam in sCode.
2105 Returns stripped value and the end offset of the terminating ',' or ')'.
2106 """
2107 # Extract it.
2108 cNesting = 0;
2109 offStart = offParam;
2110 while offParam < len(sCode):
2111 ch = sCode[offParam];
2112 if ch == '(':
2113 cNesting += 1;
2114 elif ch == ')':
2115 if cNesting == 0:
2116 break;
2117 cNesting -= 1;
2118 elif ch == ',' and cNesting == 0:
2119 break;
2120 offParam += 1;
2121 return (sCode[offStart : offParam].strip(), offParam);
2122
2123 @staticmethod
2124 def extractParams(sCode, offOpenParen):
2125 """
2126 Parses a parameter list.
2127 Returns the list of parameter values and the offset of the closing parentheses.
2128 Returns (None, len(sCode)) on if no closing parentheses was found.
2129 """
2130 assert sCode[offOpenParen] == '(';
2131 asParams = [];
2132 off = offOpenParen + 1;
2133 while off < len(sCode):
2134 ch = sCode[off];
2135 if ch.isspace():
2136 off += 1;
2137 elif ch != ')':
2138 (sParam, off) = McBlock.extractParam(sCode, off);
2139 asParams.append(sParam);
2140 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2141 if sCode[off] == ',':
2142 off += 1;
2143 else:
2144 return (asParams, off);
2145 return (None, off);
2146
2147 @staticmethod
2148 def findClosingBraces(sCode, off, offStop):
2149 """
2150 Finds the matching '}' for the '{' at off in sCode.
2151 Returns offset of the matching '}' on success, otherwise -1.
2152
2153 Note! Does not take comments into account.
2154 """
2155 cDepth = 1;
2156 off += 1;
2157 while off < offStop:
2158 offClose = sCode.find('}', off, offStop);
2159 if offClose < 0:
2160 break;
2161 cDepth += sCode.count('{', off, offClose);
2162 cDepth -= 1;
2163 if cDepth == 0:
2164 return offClose;
2165 off = offClose + 1;
2166 return -1;
2167
2168 @staticmethod
2169 def countSpacesAt(sCode, off, offStop):
2170 """ Returns the number of space characters at off in sCode. """
2171 offStart = off;
2172 while off < offStop and sCode[off].isspace():
2173 off += 1;
2174 return off - offStart;
2175
2176 @staticmethod
2177 def skipSpacesAt(sCode, off, offStop):
2178 """ Returns first offset at or after off for a non-space character. """
2179 return off + McBlock.countSpacesAt(sCode, off, offStop);
2180
2181 @staticmethod
2182 def isSubstrAt(sStr, off, sSubStr):
2183 """ Returns true of sSubStr is found at off in sStr. """
2184 return sStr[off : off + len(sSubStr)] == sSubStr;
2185
2186 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2187 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2188 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2189 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2190 + r')');
2191
2192 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2193 """
2194 Decodes sRawCode[off : offStop].
2195
2196 Returns list of McStmt instances.
2197 Raises ParserException on failure.
2198 """
2199 if offStop < 0:
2200 offStop = len(sRawCode);
2201 aoStmts = [];
2202 while off < offStop:
2203 ch = sRawCode[off];
2204
2205 #
2206 # Skip spaces and comments.
2207 #
2208 if ch.isspace():
2209 off += 1;
2210
2211 elif ch == '/':
2212 ch = sRawCode[off + 1];
2213 if ch == '/': # C++ comment.
2214 off = sRawCode.find('\n', off + 2);
2215 if off < 0:
2216 break;
2217 off += 1;
2218 elif ch == '*': # C comment.
2219 off = sRawCode.find('*/', off + 2);
2220 if off < 0:
2221 break;
2222 off += 2;
2223 else:
2224 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2225
2226 #
2227 # Is it a MC statement.
2228 #
2229 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2230 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2231 # Extract it and strip comments from it.
2232 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2233 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2234 if offEnd <= off:
2235 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2236 else:
2237 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2238 if offEnd <= off:
2239 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2240 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2241 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2242 offEnd -= 1;
2243 while offEnd > off and sRawCode[offEnd - 1].isspace():
2244 offEnd -= 1;
2245
2246 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2247
2248 # Isolate the statement name.
2249 offOpenParen = sRawStmt.find('(');
2250 if offOpenParen < 0:
2251 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2252 sName = sRawStmt[: offOpenParen].strip();
2253
2254 # Extract the parameters.
2255 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2256 if asParams is None:
2257 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2258 if offCloseParen + 1 != len(sRawStmt):
2259 self.raiseDecodeError(sRawCode, off,
2260 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2261
2262 # Hand it to the handler.
2263 fnParser = g_dMcStmtParsers.get(sName);
2264 if not fnParser:
2265 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2266 oStmt = fnParser(self, sName, asParams);
2267 if not isinstance(oStmt, (list, tuple)):
2268 aoStmts.append(oStmt);
2269 else:
2270 aoStmts.extend(oStmt);
2271
2272 #
2273 # If conditional, we need to parse the whole statement.
2274 #
2275 # For reasons of simplicity, we assume the following structure
2276 # and parse each branch in a recursive call:
2277 # IEM_MC_IF_XXX() {
2278 # IEM_MC_WHATEVER();
2279 # } IEM_MC_ELSE() {
2280 # IEM_MC_WHATEVER();
2281 # } IEM_MC_ENDIF();
2282 #
2283 if sName.startswith('IEM_MC_IF_'):
2284 if iLevel > 1:
2285 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2286
2287 # Find start of the IF block:
2288 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2289 if sRawCode[offBlock1] != '{':
2290 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2291
2292 # Find the end of it.
2293 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2294 if offBlock1End < 0:
2295 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2296
2297 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2298
2299 # Is there an else section?
2300 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2301 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2302 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2303 if sRawCode[off] != '(':
2304 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2305 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2306 if sRawCode[off] != ')':
2307 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2308
2309 # Find start of the ELSE block.
2310 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2311 if sRawCode[offBlock2] != '{':
2312 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2313
2314 # Find the end of it.
2315 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2316 if offBlock2End < 0:
2317 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2318
2319 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2320 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2321
2322 # Parse past the endif statement.
2323 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2324 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2325 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2326 if sRawCode[off] != '(':
2327 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2328 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2329 if sRawCode[off] != ')':
2330 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2331 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2332 if sRawCode[off] != ';':
2333 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2334 off += 1;
2335
2336 else:
2337 # Advance.
2338 off = offEnd + 1;
2339
2340 #
2341 # Otherwise it must be a C/C++ statement of sorts.
2342 #
2343 else:
2344 # Find the end of the statement. if and else requires special handling.
2345 sCondExpr = None;
2346 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2347 if oMatch:
2348 if oMatch.group(1)[-1] == '(':
2349 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2350 else:
2351 offEnd = oMatch.end();
2352 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2353 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2354 elif ch == '#':
2355 offEnd = sRawCode.find('\n', off, offStop);
2356 if offEnd < 0:
2357 offEnd = offStop;
2358 offEnd -= 1;
2359 while offEnd > off and sRawCode[offEnd - 1].isspace():
2360 offEnd -= 1;
2361 else:
2362 offEnd = sRawCode.find(';', off);
2363 if offEnd < 0:
2364 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2365
2366 # Check this and the following statement whether it might have
2367 # something to do with decoding. This is a statement filter
2368 # criteria when generating the threaded functions blocks.
2369 offNextEnd = sRawCode.find(';', offEnd + 1);
2370 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2371 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2372 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2373 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2374 );
2375
2376 if not oMatch:
2377 if ch != '#':
2378 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2379 else:
2380 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2381 off = offEnd + 1;
2382 elif oMatch.group(1).startswith('if'):
2383 #
2384 # if () xxx [else yyy] statement.
2385 #
2386 oStmt = McCppCond(sCondExpr, fDecode);
2387 aoStmts.append(oStmt);
2388 off = offEnd + 1;
2389
2390 # Following the if () we can either have a {} containing zero or more statements
2391 # or we have a single statement.
2392 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2393 if sRawCode[offBlock1] == '{':
2394 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2395 if offBlock1End < 0:
2396 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2397 offBlock1 += 1;
2398 else:
2399 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2400 if offBlock1End < 0:
2401 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2402
2403 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2404
2405 # The else is optional and can likewise be followed by {} or a single statement.
2406 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2407 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2408 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2409 if sRawCode[offBlock2] == '{':
2410 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2411 if offBlock2End < 0:
2412 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2413 offBlock2 += 1;
2414 else:
2415 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2416 if offBlock2End < 0:
2417 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2418
2419 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2420 off = offBlock2End + 1;
2421
2422 elif oMatch.group(1) == 'else':
2423 # Problematic 'else' branch, typically involving #ifdefs.
2424 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2425
2426 return aoStmts;
2427
2428 def decode(self):
2429 """
2430 Decodes the block, populating self.aoStmts.
2431 Returns the statement list.
2432 Raises ParserException on failure.
2433 """
2434 self.aoStmts = self.decodeCode(''.join(self.asLines));
2435 return self.aoStmts;
2436
2437
2438## IEM_MC_XXX -> parser dictionary.
2439# The raw table was generated via the following command
2440# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2441# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2442g_dMcStmtParsers = {
2443 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2444 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2445 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2446 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2447 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2448 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2449 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2450 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2451 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2452 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2453 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2454 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2455 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2456 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2457 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2458 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2459 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2460 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2461 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2462 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2463 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2464 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2465 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2466 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2467 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2468 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2469 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2470 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2471 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2472 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2473 'IEM_MC_ARG': McBlock.parseMcArg,
2474 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2475 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2476 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2477 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2478 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2479 'IEM_MC_ASSIGN_U8_SX_U64': McBlock.parseMcGeneric,
2480 'IEM_MC_ASSIGN_U32_SX_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2482 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2483 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2484 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2485 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2486 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2487 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2488 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2489 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2490 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2491 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2492 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2493 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2494 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2495 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2496 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2497 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2498 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2499 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2500 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2501 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2502 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2503 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2504 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2505 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2506 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2507 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2508 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2509 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2510 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2511 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2512 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2513 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2514 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2515 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2516 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2517 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2518 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2519 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2520 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2521 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2522 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2523 'IEM_MC_END': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2574 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2575 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2576 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2577 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2578 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2579 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2580 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2581 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2582 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2583 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2584 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2585 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2586 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2605 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2606 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2607 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2608 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2609 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2610 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2611 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2612 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2613 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2614 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2615 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2616 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2617 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2618 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2619 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2620 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2621 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2622 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2623 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2624 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2625 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2626 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2627 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2628 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2629 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2630 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2631 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2632 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2633 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2634 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2635 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2636 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2637 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2638 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2639 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2640 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2641 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2642 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2643 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2644 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2645 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2646 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2647 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2648 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2649 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2650 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2651 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2652 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2653 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2654 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2655 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2656 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2657 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2658 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2659 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2660 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2661 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2662 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2663 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2664 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2665 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2666 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2667 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2668 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2669 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2670 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2671 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2672 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2673 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2674 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2675 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2676 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2677 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2678 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2679 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2680 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2681 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2682 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2683 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2684 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2685 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2686 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2687 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2688 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2689 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2690 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2691 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2692 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2693 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2694 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2695 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2696 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2697 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2698 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2699 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2700 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2701 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2702 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2703 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2704 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2705 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2706 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2707 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2708 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2709 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2710 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2711 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2712 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2713 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2714 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2715 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2716 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2717 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2718 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2719 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2720 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2721 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2722 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2723 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2724 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2725 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2726 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2727 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2728 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2729 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2730 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2731 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2732 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2733 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2734 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2735 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2736 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2737 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2738 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2739 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2740 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2741 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2742 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2743 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2744 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2745 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2746 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2747 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2775 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2776 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2777 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2778 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2779 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2780 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2781 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2782 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2783 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2784 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2785 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2786 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2787 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2788 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2789 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2790 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2791 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2792 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2793 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2794 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2795 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2796 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2797 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2798 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2799 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2800 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2801 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2802 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2803 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2804 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2805 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2806 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2807 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2808 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2809 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2810 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2811 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2812 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2813 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2814};
2815
2816## List of microcode blocks.
2817g_aoMcBlocks = [] # type: list(McBlock)
2818
2819
2820
2821class ParserException(Exception):
2822 """ Parser exception """
2823 def __init__(self, sMessage):
2824 Exception.__init__(self, sMessage);
2825
2826
2827class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2828 """
2829 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2830 """
2831
2832 ## @name Parser state.
2833 ## @{
2834 kiCode = 0;
2835 kiCommentMulti = 1;
2836 ## @}
2837
2838 class Macro(object):
2839 """ Macro """
2840 def __init__(self, sName, asArgs, sBody, iLine):
2841 self.sName = sName; ##< The macro name.
2842 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2843 self.sBody = sBody;
2844 self.iLine = iLine;
2845 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2846
2847 @staticmethod
2848 def _needSpace(ch):
2849 """ This is just to make the expanded output a bit prettier. """
2850 return ch.isspace() and ch != '(';
2851
2852 def expandMacro(self, oParent, asArgs = None):
2853 """ Expands the macro body with the given arguments. """
2854 _ = oParent;
2855 sBody = self.sBody;
2856
2857 if self.oReArgMatch:
2858 assert len(asArgs) == len(self.asArgs);
2859 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2860
2861 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2862 oMatch = self.oReArgMatch.search(sBody);
2863 while oMatch:
2864 sName = oMatch.group(2);
2865 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2866 sValue = dArgs[sName];
2867 sPre = '';
2868 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2869 sPre = ' ';
2870 sPost = '';
2871 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2872 sPost = ' ';
2873 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2874 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2875 else:
2876 assert not asArgs;
2877
2878 return sBody;
2879
2880
2881 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2882 self.sSrcFile = sSrcFile;
2883 self.asLines = asLines;
2884 self.iLine = 0;
2885 self.iState = self.kiCode;
2886 self.sComment = '';
2887 self.iCommentLine = 0;
2888 self.aoCurInstrs = [] # type: list(Instruction)
2889 self.oCurFunction = None # type: DecoderFunction
2890 self.iMcBlockInFunc = 0;
2891 self.oCurMcBlock = None # type: McBlock
2892 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2893 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2894 if oInheritMacrosFrom:
2895 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2896 self.oReMacros = oInheritMacrosFrom.oReMacros;
2897
2898 assert sDefaultMap in g_dInstructionMaps;
2899 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2900
2901 self.cTotalInstr = 0;
2902 self.cTotalStubs = 0;
2903 self.cTotalTagged = 0;
2904 self.cTotalMcBlocks = 0;
2905
2906 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2907 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2908 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2909 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2910 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2911 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2912 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2913 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2914 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2915 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2916 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2917 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2918 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2919
2920 self.fDebug = True;
2921 self.fDebugMc = False;
2922 self.fDebugPreProc = False;
2923
2924 self.dTagHandlers = {
2925 '@opbrief': self.parseTagOpBrief,
2926 '@opdesc': self.parseTagOpDesc,
2927 '@opmnemonic': self.parseTagOpMnemonic,
2928 '@op1': self.parseTagOpOperandN,
2929 '@op2': self.parseTagOpOperandN,
2930 '@op3': self.parseTagOpOperandN,
2931 '@op4': self.parseTagOpOperandN,
2932 '@oppfx': self.parseTagOpPfx,
2933 '@opmaps': self.parseTagOpMaps,
2934 '@opcode': self.parseTagOpcode,
2935 '@opcodesub': self.parseTagOpcodeSub,
2936 '@openc': self.parseTagOpEnc,
2937 '@opfltest': self.parseTagOpEFlags,
2938 '@opflmodify': self.parseTagOpEFlags,
2939 '@opflundef': self.parseTagOpEFlags,
2940 '@opflset': self.parseTagOpEFlags,
2941 '@opflclear': self.parseTagOpEFlags,
2942 '@ophints': self.parseTagOpHints,
2943 '@opdisenum': self.parseTagOpDisEnum,
2944 '@opmincpu': self.parseTagOpMinCpu,
2945 '@opcpuid': self.parseTagOpCpuId,
2946 '@opgroup': self.parseTagOpGroup,
2947 '@opunused': self.parseTagOpUnusedInvalid,
2948 '@opinvalid': self.parseTagOpUnusedInvalid,
2949 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2950 '@optest': self.parseTagOpTest,
2951 '@optestign': self.parseTagOpTestIgnore,
2952 '@optestignore': self.parseTagOpTestIgnore,
2953 '@opcopytests': self.parseTagOpCopyTests,
2954 '@oponly': self.parseTagOpOnlyTest,
2955 '@oponlytest': self.parseTagOpOnlyTest,
2956 '@opxcpttype': self.parseTagOpXcptType,
2957 '@opstats': self.parseTagOpStats,
2958 '@opfunction': self.parseTagOpFunction,
2959 '@opdone': self.parseTagOpDone,
2960 };
2961 for i in range(48):
2962 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2963 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2964
2965 self.asErrors = [];
2966
2967 def raiseError(self, sMessage):
2968 """
2969 Raise error prefixed with the source and line number.
2970 """
2971 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2972
2973 def raiseCommentError(self, iLineInComment, sMessage):
2974 """
2975 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2976 """
2977 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2978
2979 def error(self, sMessage):
2980 """
2981 Adds an error.
2982 returns False;
2983 """
2984 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2985 return False;
2986
2987 def errorOnLine(self, iLine, sMessage):
2988 """
2989 Adds an error.
2990 returns False;
2991 """
2992 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2993 return False;
2994
2995 def errorComment(self, iLineInComment, sMessage):
2996 """
2997 Adds a comment error.
2998 returns False;
2999 """
3000 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3001 return False;
3002
3003 def printErrors(self):
3004 """
3005 Print the errors to stderr.
3006 Returns number of errors.
3007 """
3008 if self.asErrors:
3009 sys.stderr.write(u''.join(self.asErrors));
3010 return len(self.asErrors);
3011
3012 def debug(self, sMessage):
3013 """
3014 For debugging.
3015 """
3016 if self.fDebug:
3017 print('debug: %s' % (sMessage,), file = sys.stderr);
3018
3019 def stripComments(self, sLine):
3020 """
3021 Returns sLine with comments stripped.
3022
3023 Complains if traces of incomplete multi-line comments are encountered.
3024 """
3025 sLine = self.oReComment.sub(" ", sLine);
3026 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3027 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3028 return sLine;
3029
3030 def parseFunctionTable(self, sLine):
3031 """
3032 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3033
3034 Note! Updates iLine as it consumes the whole table.
3035 """
3036
3037 #
3038 # Extract the table name.
3039 #
3040 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3041 oMap = g_dInstructionMapsByIemName.get(sName);
3042 if not oMap:
3043 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3044 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3045
3046 #
3047 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3048 # entries per byte:
3049 # no prefix, 066h prefix, f3h prefix, f2h prefix
3050 # Those tables has 256 & 32 entries respectively.
3051 #
3052 cEntriesPerByte = 4;
3053 cValidTableLength = 1024;
3054 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3055
3056 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3057 if oEntriesMatch:
3058 cEntriesPerByte = 1;
3059 cValidTableLength = int(oEntriesMatch.group(1));
3060 asPrefixes = (None,);
3061
3062 #
3063 # The next line should be '{' and nothing else.
3064 #
3065 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3066 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3067 self.iLine += 1;
3068
3069 #
3070 # Parse till we find the end of the table.
3071 #
3072 iEntry = 0;
3073 while self.iLine < len(self.asLines):
3074 # Get the next line and strip comments and spaces (assumes no
3075 # multi-line comments).
3076 sLine = self.asLines[self.iLine];
3077 self.iLine += 1;
3078 sLine = self.stripComments(sLine).strip();
3079
3080 # Split the line up into entries, expanding IEMOP_X4 usage.
3081 asEntries = sLine.split(',');
3082 for i in range(len(asEntries) - 1, -1, -1):
3083 sEntry = asEntries[i].strip();
3084 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3085 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3086 asEntries.insert(i + 1, sEntry);
3087 asEntries.insert(i + 1, sEntry);
3088 asEntries.insert(i + 1, sEntry);
3089 if sEntry:
3090 asEntries[i] = sEntry;
3091 else:
3092 del asEntries[i];
3093
3094 # Process the entries.
3095 for sEntry in asEntries:
3096 if sEntry in ('};', '}'):
3097 if iEntry != cValidTableLength:
3098 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3099 return True;
3100 if sEntry.startswith('iemOp_Invalid'):
3101 pass; # skip
3102 else:
3103 # Look up matching instruction by function.
3104 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3105 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3106 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3107 if aoInstr:
3108 if not isinstance(aoInstr, list):
3109 aoInstr = [aoInstr,];
3110 oInstr = None;
3111 for oCurInstr in aoInstr:
3112 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3113 pass;
3114 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3115 oCurInstr.sPrefix = sPrefix;
3116 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3117 oCurInstr.sOpcode = sOpcode;
3118 oCurInstr.sPrefix = sPrefix;
3119 else:
3120 continue;
3121 oInstr = oCurInstr;
3122 break;
3123 if not oInstr:
3124 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3125 aoInstr.append(oInstr);
3126 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3127 g_aoAllInstructions.append(oInstr);
3128 oMap.aoInstructions.append(oInstr);
3129 else:
3130 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3131 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3132 iEntry += 1;
3133
3134 return self.error('Unexpected end of file in PFNIEMOP table');
3135
3136 def addInstruction(self, iLine = None):
3137 """
3138 Adds an instruction.
3139 """
3140 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3141 g_aoAllInstructions.append(oInstr);
3142 self.aoCurInstrs.append(oInstr);
3143 return oInstr;
3144
3145 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3146 """
3147 Derives the mnemonic and operands from a IEM stats base name like string.
3148 """
3149 if oInstr.sMnemonic is None:
3150 asWords = sStats.split('_');
3151 oInstr.sMnemonic = asWords[0].lower();
3152 if len(asWords) > 1 and not oInstr.aoOperands:
3153 for sType in asWords[1:]:
3154 if sType in g_kdOpTypes:
3155 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3156 else:
3157 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3158 return False;
3159 return True;
3160
3161 def doneInstructionOne(self, oInstr, iLine):
3162 """
3163 Complete the parsing by processing, validating and expanding raw inputs.
3164 """
3165 assert oInstr.iLineCompleted is None;
3166 oInstr.iLineCompleted = iLine;
3167
3168 #
3169 # Specified instructions.
3170 #
3171 if oInstr.cOpTags > 0:
3172 if oInstr.sStats is None:
3173 pass;
3174
3175 #
3176 # Unspecified legacy stuff. We generally only got a few things to go on here.
3177 # /** Opcode 0x0f 0x00 /0. */
3178 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3179 #
3180 else:
3181 #if oInstr.sRawOldOpcodes:
3182 #
3183 #if oInstr.sMnemonic:
3184 pass;
3185
3186 #
3187 # Common defaults.
3188 #
3189
3190 # Guess mnemonic and operands from stats if the former is missing.
3191 if oInstr.sMnemonic is None:
3192 if oInstr.sStats is not None:
3193 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3194 elif oInstr.sFunction is not None:
3195 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3196
3197 # Derive the disassembler op enum constant from the mnemonic.
3198 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3199 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3200
3201 # Derive the IEM statistics base name from mnemonic and operand types.
3202 if oInstr.sStats is None:
3203 if oInstr.sFunction is not None:
3204 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3205 elif oInstr.sMnemonic is not None:
3206 oInstr.sStats = oInstr.sMnemonic;
3207 for oOperand in oInstr.aoOperands:
3208 if oOperand.sType:
3209 oInstr.sStats += '_' + oOperand.sType;
3210
3211 # Derive the IEM function name from mnemonic and operand types.
3212 if oInstr.sFunction is None:
3213 if oInstr.sMnemonic is not None:
3214 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3215 for oOperand in oInstr.aoOperands:
3216 if oOperand.sType:
3217 oInstr.sFunction += '_' + oOperand.sType;
3218 elif oInstr.sStats:
3219 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3220
3221 #
3222 # Apply default map and then add the instruction to all it's groups.
3223 #
3224 if not oInstr.aoMaps:
3225 oInstr.aoMaps = [ self.oDefaultMap, ];
3226 for oMap in oInstr.aoMaps:
3227 oMap.aoInstructions.append(oInstr);
3228
3229 #
3230 # Derive encoding from operands and maps.
3231 #
3232 if oInstr.sEncoding is None:
3233 if not oInstr.aoOperands:
3234 if oInstr.fUnused and oInstr.sSubOpcode:
3235 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3236 else:
3237 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3238 elif oInstr.aoOperands[0].usesModRM():
3239 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3240 or oInstr.onlyInVexMaps():
3241 oInstr.sEncoding = 'VEX.ModR/M';
3242 else:
3243 oInstr.sEncoding = 'ModR/M';
3244
3245 #
3246 # Check the opstat value and add it to the opstat indexed dictionary.
3247 #
3248 if oInstr.sStats:
3249 if oInstr.sStats not in g_dAllInstructionsByStat:
3250 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3251 else:
3252 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3253 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3254
3255 #
3256 # Add to function indexed dictionary. We allow multiple instructions per function.
3257 #
3258 if oInstr.sFunction:
3259 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3260 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3261 else:
3262 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3263
3264 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3265 return True;
3266
3267 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3268 """
3269 Done with current instruction.
3270 """
3271 for oInstr in self.aoCurInstrs:
3272 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3273 if oInstr.fStub:
3274 self.cTotalStubs += 1;
3275
3276 self.cTotalInstr += len(self.aoCurInstrs);
3277
3278 self.sComment = '';
3279 self.aoCurInstrs = [];
3280 if fEndOfFunction:
3281 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3282 if self.oCurFunction:
3283 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3284 self.oCurFunction = None;
3285 self.iMcBlockInFunc = 0;
3286 return True;
3287
3288 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3289 """
3290 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3291 is False, only None values and empty strings are replaced.
3292 """
3293 for oInstr in self.aoCurInstrs:
3294 if fOverwrite is not True:
3295 oOldValue = getattr(oInstr, sAttrib);
3296 if oOldValue is not None:
3297 continue;
3298 setattr(oInstr, sAttrib, oValue);
3299
3300 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3301 """
3302 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3303 If fOverwrite is False, only None values and empty strings are replaced.
3304 """
3305 for oInstr in self.aoCurInstrs:
3306 aoArray = getattr(oInstr, sAttrib);
3307 while len(aoArray) <= iEntry:
3308 aoArray.append(None);
3309 if fOverwrite is True or aoArray[iEntry] is None:
3310 aoArray[iEntry] = oValue;
3311
3312 def parseCommentOldOpcode(self, asLines):
3313 """ Deals with 'Opcode 0xff /4' like comments """
3314 asWords = asLines[0].split();
3315 if len(asWords) >= 2 \
3316 and asWords[0] == 'Opcode' \
3317 and ( asWords[1].startswith('0x')
3318 or asWords[1].startswith('0X')):
3319 asWords = asWords[:1];
3320 for iWord, sWord in enumerate(asWords):
3321 if sWord.startswith('0X'):
3322 sWord = '0x' + sWord[:2];
3323 asWords[iWord] = asWords;
3324 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3325
3326 return False;
3327
3328 def ensureInstructionForOpTag(self, iTagLine):
3329 """ Ensure there is an instruction for the op-tag being parsed. """
3330 if not self.aoCurInstrs:
3331 self.addInstruction(self.iCommentLine + iTagLine);
3332 for oInstr in self.aoCurInstrs:
3333 oInstr.cOpTags += 1;
3334 if oInstr.cOpTags == 1:
3335 self.cTotalTagged += 1;
3336 return self.aoCurInstrs[-1];
3337
3338 @staticmethod
3339 def flattenSections(aasSections):
3340 """
3341 Flattens multiline sections into stripped single strings.
3342 Returns list of strings, on section per string.
3343 """
3344 asRet = [];
3345 for asLines in aasSections:
3346 if asLines:
3347 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3348 return asRet;
3349
3350 @staticmethod
3351 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3352 """
3353 Flattens sections into a simple stripped string with newlines as
3354 section breaks. The final section does not sport a trailing newline.
3355 """
3356 # Typical: One section with a single line.
3357 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3358 return aasSections[0][0].strip();
3359
3360 sRet = '';
3361 for iSection, asLines in enumerate(aasSections):
3362 if asLines:
3363 if iSection > 0:
3364 sRet += sSectionSep;
3365 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3366 return sRet;
3367
3368
3369
3370 ## @name Tag parsers
3371 ## @{
3372
3373 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3374 """
3375 Tag: \@opbrief
3376 Value: Text description, multiple sections, appended.
3377
3378 Brief description. If not given, it's the first sentence from @opdesc.
3379 """
3380 oInstr = self.ensureInstructionForOpTag(iTagLine);
3381
3382 # Flatten and validate the value.
3383 sBrief = self.flattenAllSections(aasSections);
3384 if not sBrief:
3385 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3386 if sBrief[-1] != '.':
3387 sBrief = sBrief + '.';
3388 if len(sBrief) > 180:
3389 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3390 offDot = sBrief.find('.');
3391 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3392 offDot = sBrief.find('.', offDot + 1);
3393 if offDot >= 0 and offDot != len(sBrief) - 1:
3394 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3395
3396 # Update the instruction.
3397 if oInstr.sBrief is not None:
3398 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3399 % (sTag, oInstr.sBrief, sBrief,));
3400 _ = iEndLine;
3401 return True;
3402
3403 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3404 """
3405 Tag: \@opdesc
3406 Value: Text description, multiple sections, appended.
3407
3408 It is used to describe instructions.
3409 """
3410 oInstr = self.ensureInstructionForOpTag(iTagLine);
3411 if aasSections:
3412 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3413 return True;
3414
3415 _ = sTag; _ = iEndLine;
3416 return True;
3417
3418 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3419 """
3420 Tag: @opmenmonic
3421 Value: mnemonic
3422
3423 The 'mnemonic' value must be a valid C identifier string. Because of
3424 prefixes, groups and whatnot, there times when the mnemonic isn't that
3425 of an actual assembler mnemonic.
3426 """
3427 oInstr = self.ensureInstructionForOpTag(iTagLine);
3428
3429 # Flatten and validate the value.
3430 sMnemonic = self.flattenAllSections(aasSections);
3431 if not self.oReMnemonic.match(sMnemonic):
3432 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3433 if oInstr.sMnemonic is not None:
3434 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3435 % (sTag, oInstr.sMnemonic, sMnemonic,));
3436 oInstr.sMnemonic = sMnemonic
3437
3438 _ = iEndLine;
3439 return True;
3440
3441 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3442 """
3443 Tags: \@op1, \@op2, \@op3, \@op4
3444 Value: [where:]type
3445
3446 The 'where' value indicates where the operand is found, like the 'reg'
3447 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3448 a list.
3449
3450 The 'type' value indicates the operand type. These follow the types
3451 given in the opcode tables in the CPU reference manuals.
3452 See Instruction.kdOperandTypes for a list.
3453
3454 """
3455 oInstr = self.ensureInstructionForOpTag(iTagLine);
3456 idxOp = int(sTag[-1]) - 1;
3457 assert 0 <= idxOp < 4;
3458
3459 # flatten, split up, and validate the "where:type" value.
3460 sFlattened = self.flattenAllSections(aasSections);
3461 asSplit = sFlattened.split(':');
3462 if len(asSplit) == 1:
3463 sType = asSplit[0];
3464 sWhere = None;
3465 elif len(asSplit) == 2:
3466 (sWhere, sType) = asSplit;
3467 else:
3468 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3469
3470 if sType not in g_kdOpTypes:
3471 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3472 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3473 if sWhere is None:
3474 sWhere = g_kdOpTypes[sType][1];
3475 elif sWhere not in g_kdOpLocations:
3476 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3477 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3478
3479 # Insert the operand, refusing to overwrite an existing one.
3480 while idxOp >= len(oInstr.aoOperands):
3481 oInstr.aoOperands.append(None);
3482 if oInstr.aoOperands[idxOp] is not None:
3483 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3484 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3485 sWhere, sType,));
3486 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3487
3488 _ = iEndLine;
3489 return True;
3490
3491 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3492 """
3493 Tag: \@opmaps
3494 Value: map[,map2]
3495
3496 Indicates which maps the instruction is in. There is a default map
3497 associated with each input file.
3498 """
3499 oInstr = self.ensureInstructionForOpTag(iTagLine);
3500
3501 # Flatten, split up and validate the value.
3502 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3503 asMaps = sFlattened.split(',');
3504 if not asMaps:
3505 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3506 for sMap in asMaps:
3507 if sMap not in g_dInstructionMaps:
3508 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3509 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3510
3511 # Add the maps to the current list. Throw errors on duplicates.
3512 for oMap in oInstr.aoMaps:
3513 if oMap.sName in asMaps:
3514 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3515
3516 for sMap in asMaps:
3517 oMap = g_dInstructionMaps[sMap];
3518 if oMap not in oInstr.aoMaps:
3519 oInstr.aoMaps.append(oMap);
3520 else:
3521 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3522
3523 _ = iEndLine;
3524 return True;
3525
3526 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3527 """
3528 Tag: \@oppfx
3529 Value: n/a|none|0x66|0xf3|0xf2
3530
3531 Required prefix for the instruction. (In a (E)VEX context this is the
3532 value of the 'pp' field rather than an actual prefix.)
3533 """
3534 oInstr = self.ensureInstructionForOpTag(iTagLine);
3535
3536 # Flatten and validate the value.
3537 sFlattened = self.flattenAllSections(aasSections);
3538 asPrefixes = sFlattened.split();
3539 if len(asPrefixes) > 1:
3540 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3541
3542 sPrefix = asPrefixes[0].lower();
3543 if sPrefix == 'none':
3544 sPrefix = 'none';
3545 elif sPrefix == 'n/a':
3546 sPrefix = None;
3547 else:
3548 if len(sPrefix) == 2:
3549 sPrefix = '0x' + sPrefix;
3550 if not _isValidOpcodeByte(sPrefix):
3551 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3552
3553 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3554 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3555
3556 # Set it.
3557 if oInstr.sPrefix is not None:
3558 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3559 oInstr.sPrefix = sPrefix;
3560
3561 _ = iEndLine;
3562 return True;
3563
3564 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3565 """
3566 Tag: \@opcode
3567 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3568
3569 The opcode byte or sub-byte for the instruction in the context of a map.
3570 """
3571 oInstr = self.ensureInstructionForOpTag(iTagLine);
3572
3573 # Flatten and validate the value.
3574 sOpcode = self.flattenAllSections(aasSections);
3575 if _isValidOpcodeByte(sOpcode):
3576 pass;
3577 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3578 pass;
3579 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3580 pass;
3581 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3582 pass;
3583 else:
3584 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3585
3586 # Set it.
3587 if oInstr.sOpcode is not None:
3588 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3589 oInstr.sOpcode = sOpcode;
3590
3591 _ = iEndLine;
3592 return True;
3593
3594 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3595 """
3596 Tag: \@opcodesub
3597 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3598 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3599
3600 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3601 represents exactly two different instructions. The more proper way would
3602 be to go via maps with two members, but this is faster.
3603 """
3604 oInstr = self.ensureInstructionForOpTag(iTagLine);
3605
3606 # Flatten and validate the value.
3607 sSubOpcode = self.flattenAllSections(aasSections);
3608 if sSubOpcode not in g_kdSubOpcodes:
3609 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3610 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3611
3612 # Set it.
3613 if oInstr.sSubOpcode is not None:
3614 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3615 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3616 oInstr.sSubOpcode = sSubOpcode;
3617
3618 _ = iEndLine;
3619 return True;
3620
3621 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3622 """
3623 Tag: \@openc
3624 Value: ModR/M|fixed|prefix|<map name>
3625
3626 The instruction operand encoding style.
3627 """
3628 oInstr = self.ensureInstructionForOpTag(iTagLine);
3629
3630 # Flatten and validate the value.
3631 sEncoding = self.flattenAllSections(aasSections);
3632 if sEncoding in g_kdEncodings:
3633 pass;
3634 elif sEncoding in g_dInstructionMaps:
3635 pass;
3636 elif not _isValidOpcodeByte(sEncoding):
3637 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3638
3639 # Set it.
3640 if oInstr.sEncoding is not None:
3641 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3642 % ( sTag, oInstr.sEncoding, sEncoding,));
3643 oInstr.sEncoding = sEncoding;
3644
3645 _ = iEndLine;
3646 return True;
3647
3648 ## EFlags tag to Instruction attribute name.
3649 kdOpFlagToAttr = {
3650 '@opfltest': 'asFlTest',
3651 '@opflmodify': 'asFlModify',
3652 '@opflundef': 'asFlUndefined',
3653 '@opflset': 'asFlSet',
3654 '@opflclear': 'asFlClear',
3655 };
3656
3657 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3658 """
3659 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3660 Value: <eflags specifier>
3661
3662 """
3663 oInstr = self.ensureInstructionForOpTag(iTagLine);
3664
3665 # Flatten, split up and validate the values.
3666 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3667 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3668 asFlags = [];
3669 else:
3670 fRc = True;
3671 for iFlag, sFlag in enumerate(asFlags):
3672 if sFlag not in g_kdEFlagsMnemonics:
3673 if sFlag.strip() in g_kdEFlagsMnemonics:
3674 asFlags[iFlag] = sFlag.strip();
3675 else:
3676 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3677 if not fRc:
3678 return False;
3679
3680 # Set them.
3681 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3682 if asOld is not None:
3683 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3684 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3685
3686 _ = iEndLine;
3687 return True;
3688
3689 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3690 """
3691 Tag: \@ophints
3692 Value: Comma or space separated list of flags and hints.
3693
3694 This covers the disassembler flags table and more.
3695 """
3696 oInstr = self.ensureInstructionForOpTag(iTagLine);
3697
3698 # Flatten as a space separated list, split it up and validate the values.
3699 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3700 if len(asHints) == 1 and asHints[0].lower() == 'none':
3701 asHints = [];
3702 else:
3703 fRc = True;
3704 for iHint, sHint in enumerate(asHints):
3705 if sHint not in g_kdHints:
3706 if sHint.strip() in g_kdHints:
3707 sHint[iHint] = sHint.strip();
3708 else:
3709 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3710 if not fRc:
3711 return False;
3712
3713 # Append them.
3714 for sHint in asHints:
3715 if sHint not in oInstr.dHints:
3716 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3717 else:
3718 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3719
3720 _ = iEndLine;
3721 return True;
3722
3723 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3724 """
3725 Tag: \@opdisenum
3726 Value: OP_XXXX
3727
3728 This is for select a specific (legacy) disassembler enum value for the
3729 instruction.
3730 """
3731 oInstr = self.ensureInstructionForOpTag(iTagLine);
3732
3733 # Flatten and split.
3734 asWords = self.flattenAllSections(aasSections).split();
3735 if len(asWords) != 1:
3736 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3737 if not asWords:
3738 return False;
3739 sDisEnum = asWords[0];
3740 if not self.oReDisEnum.match(sDisEnum):
3741 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3742 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3743
3744 # Set it.
3745 if oInstr.sDisEnum is not None:
3746 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3747 oInstr.sDisEnum = sDisEnum;
3748
3749 _ = iEndLine;
3750 return True;
3751
3752 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3753 """
3754 Tag: \@opmincpu
3755 Value: <simple CPU name>
3756
3757 Indicates when this instruction was introduced.
3758 """
3759 oInstr = self.ensureInstructionForOpTag(iTagLine);
3760
3761 # Flatten the value, split into words, make sure there's just one, valid it.
3762 asCpus = self.flattenAllSections(aasSections).split();
3763 if len(asCpus) > 1:
3764 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3765
3766 sMinCpu = asCpus[0];
3767 if sMinCpu in g_kdCpuNames:
3768 oInstr.sMinCpu = sMinCpu;
3769 else:
3770 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3771 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3772
3773 # Set it.
3774 if oInstr.sMinCpu is None:
3775 oInstr.sMinCpu = sMinCpu;
3776 elif oInstr.sMinCpu != sMinCpu:
3777 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3778
3779 _ = iEndLine;
3780 return True;
3781
3782 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3783 """
3784 Tag: \@opcpuid
3785 Value: none | <CPUID flag specifier>
3786
3787 CPUID feature bit which is required for the instruction to be present.
3788 """
3789 oInstr = self.ensureInstructionForOpTag(iTagLine);
3790
3791 # Flatten as a space separated list, split it up and validate the values.
3792 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3793 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3794 asCpuIds = [];
3795 else:
3796 fRc = True;
3797 for iCpuId, sCpuId in enumerate(asCpuIds):
3798 if sCpuId not in g_kdCpuIdFlags:
3799 if sCpuId.strip() in g_kdCpuIdFlags:
3800 sCpuId[iCpuId] = sCpuId.strip();
3801 else:
3802 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3803 if not fRc:
3804 return False;
3805
3806 # Append them.
3807 for sCpuId in asCpuIds:
3808 if sCpuId not in oInstr.asCpuIds:
3809 oInstr.asCpuIds.append(sCpuId);
3810 else:
3811 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3812
3813 _ = iEndLine;
3814 return True;
3815
3816 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3817 """
3818 Tag: \@opgroup
3819 Value: op_grp1[_subgrp2[_subsubgrp3]]
3820
3821 Instruction grouping.
3822 """
3823 oInstr = self.ensureInstructionForOpTag(iTagLine);
3824
3825 # Flatten as a space separated list, split it up and validate the values.
3826 asGroups = self.flattenAllSections(aasSections).split();
3827 if len(asGroups) != 1:
3828 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3829 sGroup = asGroups[0];
3830 if not self.oReGroupName.match(sGroup):
3831 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3832 % (sTag, sGroup, self.oReGroupName.pattern));
3833
3834 # Set it.
3835 if oInstr.sGroup is not None:
3836 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3837 oInstr.sGroup = sGroup;
3838
3839 _ = iEndLine;
3840 return True;
3841
3842 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3843 """
3844 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3845 Value: <invalid opcode behaviour style>
3846
3847 The \@opunused indicates the specification is for a currently unused
3848 instruction encoding.
3849
3850 The \@opinvalid indicates the specification is for an invalid currently
3851 instruction encoding (like UD2).
3852
3853 The \@opinvlstyle just indicates how CPUs decode the instruction when
3854 not supported (\@opcpuid, \@opmincpu) or disabled.
3855 """
3856 oInstr = self.ensureInstructionForOpTag(iTagLine);
3857
3858 # Flatten as a space separated list, split it up and validate the values.
3859 asStyles = self.flattenAllSections(aasSections).split();
3860 if len(asStyles) != 1:
3861 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3862 sStyle = asStyles[0];
3863 if sStyle not in g_kdInvalidStyles:
3864 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3865 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3866 # Set it.
3867 if oInstr.sInvalidStyle is not None:
3868 return self.errorComment(iTagLine,
3869 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3870 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3871 oInstr.sInvalidStyle = sStyle;
3872 if sTag == '@opunused':
3873 oInstr.fUnused = True;
3874 elif sTag == '@opinvalid':
3875 oInstr.fInvalid = True;
3876
3877 _ = iEndLine;
3878 return True;
3879
3880 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3881 """
3882 Tag: \@optest
3883 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3884 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3885
3886 The main idea here is to generate basic instruction tests.
3887
3888 The probably simplest way of handling the diverse input, would be to use
3889 it to produce size optimized byte code for a simple interpreter that
3890 modifies the register input and output states.
3891
3892 An alternative to the interpreter would be creating multiple tables,
3893 but that becomes rather complicated wrt what goes where and then to use
3894 them in an efficient manner.
3895 """
3896 oInstr = self.ensureInstructionForOpTag(iTagLine);
3897
3898 #
3899 # Do it section by section.
3900 #
3901 for asSectionLines in aasSections:
3902 #
3903 # Sort the input into outputs, inputs and selector conditions.
3904 #
3905 sFlatSection = self.flattenAllSections([asSectionLines,]);
3906 if not sFlatSection:
3907 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3908 continue;
3909 oTest = InstructionTest(oInstr);
3910
3911 asSelectors = [];
3912 asInputs = [];
3913 asOutputs = [];
3914 asCur = asOutputs;
3915 fRc = True;
3916 asWords = sFlatSection.split();
3917 for iWord in range(len(asWords) - 1, -1, -1):
3918 sWord = asWords[iWord];
3919 # Check for array switchers.
3920 if sWord == '->':
3921 if asCur != asOutputs:
3922 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3923 break;
3924 asCur = asInputs;
3925 elif sWord == '/':
3926 if asCur != asInputs:
3927 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3928 break;
3929 asCur = asSelectors;
3930 else:
3931 asCur.insert(0, sWord);
3932
3933 #
3934 # Validate and add selectors.
3935 #
3936 for sCond in asSelectors:
3937 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3938 oSelector = None;
3939 for sOp in TestSelector.kasCompareOps:
3940 off = sCondExp.find(sOp);
3941 if off >= 0:
3942 sVariable = sCondExp[:off];
3943 sValue = sCondExp[off + len(sOp):];
3944 if sVariable in TestSelector.kdVariables:
3945 if sValue in TestSelector.kdVariables[sVariable]:
3946 oSelector = TestSelector(sVariable, sOp, sValue);
3947 else:
3948 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3949 % ( sTag, sValue, sCond,
3950 TestSelector.kdVariables[sVariable].keys(),));
3951 else:
3952 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3953 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3954 break;
3955 if oSelector is not None:
3956 for oExisting in oTest.aoSelectors:
3957 if oExisting.sVariable == oSelector.sVariable:
3958 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3959 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3960 oTest.aoSelectors.append(oSelector);
3961 else:
3962 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3963
3964 #
3965 # Validate outputs and inputs, adding them to the test as we go along.
3966 #
3967 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3968 asValidFieldKinds = [ 'both', sDesc, ];
3969 for sItem in asItems:
3970 oItem = None;
3971 for sOp in TestInOut.kasOperators:
3972 off = sItem.find(sOp);
3973 if off < 0:
3974 continue;
3975 sField = sItem[:off];
3976 sValueType = sItem[off + len(sOp):];
3977 if sField in TestInOut.kdFields \
3978 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3979 asSplit = sValueType.split(':', 1);
3980 sValue = asSplit[0];
3981 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3982 if sType in TestInOut.kdTypes:
3983 oValid = TestInOut.kdTypes[sType].validate(sValue);
3984 if oValid is True:
3985 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3986 oItem = TestInOut(sField, sOp, sValue, sType);
3987 else:
3988 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3989 % ( sTag, sDesc, sItem, ));
3990 else:
3991 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3992 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3993 else:
3994 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3995 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3996 else:
3997 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
3998 % ( sTag, sDesc, sField, sItem,
3999 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4000 if asVal[1] in asValidFieldKinds]),));
4001 break;
4002 if oItem is not None:
4003 for oExisting in aoDst:
4004 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4005 self.errorComment(iTagLine,
4006 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4007 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4008 aoDst.append(oItem);
4009 else:
4010 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4011
4012 #
4013 # .
4014 #
4015 if fRc:
4016 oInstr.aoTests.append(oTest);
4017 else:
4018 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4019 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4020 % (sTag, asSelectors, asInputs, asOutputs,));
4021
4022 _ = iEndLine;
4023 return True;
4024
4025 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4026 """
4027 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4028 """
4029 oInstr = self.ensureInstructionForOpTag(iTagLine);
4030
4031 iTest = 0;
4032 if sTag[-1] == ']':
4033 iTest = int(sTag[8:-1]);
4034 else:
4035 iTest = int(sTag[7:]);
4036
4037 if iTest != len(oInstr.aoTests):
4038 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4039 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4040
4041 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4042 """
4043 Tag: \@optestign | \@optestignore
4044 Value: <value is ignored>
4045
4046 This is a simple trick to ignore a test while debugging another.
4047
4048 See also \@oponlytest.
4049 """
4050 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4051 return True;
4052
4053 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4054 """
4055 Tag: \@opcopytests
4056 Value: <opstat | function> [..]
4057 Example: \@opcopytests add_Eb_Gb
4058
4059 Trick to avoid duplicating tests for different encodings of the same
4060 operation.
4061 """
4062 oInstr = self.ensureInstructionForOpTag(iTagLine);
4063
4064 # Flatten, validate and append the copy job to the instruction. We execute
4065 # them after parsing all the input so we can handle forward references.
4066 asToCopy = self.flattenAllSections(aasSections).split();
4067 if not asToCopy:
4068 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4069 for sToCopy in asToCopy:
4070 if sToCopy not in oInstr.asCopyTests:
4071 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4072 oInstr.asCopyTests.append(sToCopy);
4073 else:
4074 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4075 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4076 else:
4077 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4078
4079 _ = iEndLine;
4080 return True;
4081
4082 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4083 """
4084 Tag: \@oponlytest | \@oponly
4085 Value: none
4086
4087 Only test instructions with this tag. This is a trick that is handy
4088 for singling out one or two new instructions or tests.
4089
4090 See also \@optestignore.
4091 """
4092 oInstr = self.ensureInstructionForOpTag(iTagLine);
4093
4094 # Validate and add instruction to only test dictionary.
4095 sValue = self.flattenAllSections(aasSections).strip();
4096 if sValue:
4097 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4098
4099 if oInstr not in g_aoOnlyTestInstructions:
4100 g_aoOnlyTestInstructions.append(oInstr);
4101
4102 _ = iEndLine;
4103 return True;
4104
4105 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4106 """
4107 Tag: \@opxcpttype
4108 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4109
4110 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4111 """
4112 oInstr = self.ensureInstructionForOpTag(iTagLine);
4113
4114 # Flatten as a space separated list, split it up and validate the values.
4115 asTypes = self.flattenAllSections(aasSections).split();
4116 if len(asTypes) != 1:
4117 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4118 sType = asTypes[0];
4119 if sType not in g_kdXcptTypes:
4120 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4121 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4122 # Set it.
4123 if oInstr.sXcptType is not None:
4124 return self.errorComment(iTagLine,
4125 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4126 % ( sTag, oInstr.sXcptType, sType,));
4127 oInstr.sXcptType = sType;
4128
4129 _ = iEndLine;
4130 return True;
4131
4132 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4133 """
4134 Tag: \@opfunction
4135 Value: <VMM function name>
4136
4137 This is for explicitly setting the IEM function name. Normally we pick
4138 this up from the FNIEMOP_XXX macro invocation after the description, or
4139 generate it from the mnemonic and operands.
4140
4141 It it thought it maybe necessary to set it when specifying instructions
4142 which implementation isn't following immediately or aren't implemented yet.
4143 """
4144 oInstr = self.ensureInstructionForOpTag(iTagLine);
4145
4146 # Flatten and validate the value.
4147 sFunction = self.flattenAllSections(aasSections);
4148 if not self.oReFunctionName.match(sFunction):
4149 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4150 % (sTag, sFunction, self.oReFunctionName.pattern));
4151
4152 if oInstr.sFunction is not None:
4153 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4154 % (sTag, oInstr.sFunction, sFunction,));
4155 oInstr.sFunction = sFunction;
4156
4157 _ = iEndLine;
4158 return True;
4159
4160 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4161 """
4162 Tag: \@opstats
4163 Value: <VMM statistics base name>
4164
4165 This is for explicitly setting the statistics name. Normally we pick
4166 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4167 the mnemonic and operands.
4168
4169 It it thought it maybe necessary to set it when specifying instructions
4170 which implementation isn't following immediately or aren't implemented yet.
4171 """
4172 oInstr = self.ensureInstructionForOpTag(iTagLine);
4173
4174 # Flatten and validate the value.
4175 sStats = self.flattenAllSections(aasSections);
4176 if not self.oReStatsName.match(sStats):
4177 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4178 % (sTag, sStats, self.oReStatsName.pattern));
4179
4180 if oInstr.sStats is not None:
4181 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4182 % (sTag, oInstr.sStats, sStats,));
4183 oInstr.sStats = sStats;
4184
4185 _ = iEndLine;
4186 return True;
4187
4188 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4189 """
4190 Tag: \@opdone
4191 Value: none
4192
4193 Used to explictily flush the instructions that have been specified.
4194 """
4195 sFlattened = self.flattenAllSections(aasSections);
4196 if sFlattened != '':
4197 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4198 _ = sTag; _ = iEndLine;
4199 return self.doneInstructions();
4200
4201 ## @}
4202
4203
4204 def parseComment(self):
4205 """
4206 Parse the current comment (self.sComment).
4207
4208 If it's a opcode specifiying comment, we reset the macro stuff.
4209 """
4210 #
4211 # Reject if comment doesn't seem to contain anything interesting.
4212 #
4213 if self.sComment.find('Opcode') < 0 \
4214 and self.sComment.find('@') < 0:
4215 return False;
4216
4217 #
4218 # Split the comment into lines, removing leading asterisks and spaces.
4219 # Also remove leading and trailing empty lines.
4220 #
4221 asLines = self.sComment.split('\n');
4222 for iLine, sLine in enumerate(asLines):
4223 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4224
4225 while asLines and not asLines[0]:
4226 self.iCommentLine += 1;
4227 asLines.pop(0);
4228
4229 while asLines and not asLines[-1]:
4230 asLines.pop(len(asLines) - 1);
4231
4232 #
4233 # Check for old style: Opcode 0x0f 0x12
4234 #
4235 if asLines[0].startswith('Opcode '):
4236 self.parseCommentOldOpcode(asLines);
4237
4238 #
4239 # Look for @op* tagged data.
4240 #
4241 cOpTags = 0;
4242 sFlatDefault = None;
4243 sCurTag = '@default';
4244 iCurTagLine = 0;
4245 asCurSection = [];
4246 aasSections = [ asCurSection, ];
4247 for iLine, sLine in enumerate(asLines):
4248 if not sLine.startswith('@'):
4249 if sLine:
4250 asCurSection.append(sLine);
4251 elif asCurSection:
4252 asCurSection = [];
4253 aasSections.append(asCurSection);
4254 else:
4255 #
4256 # Process the previous tag.
4257 #
4258 if not asCurSection and len(aasSections) > 1:
4259 aasSections.pop(-1);
4260 if sCurTag in self.dTagHandlers:
4261 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4262 cOpTags += 1;
4263 elif sCurTag.startswith('@op'):
4264 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4265 elif sCurTag == '@default':
4266 sFlatDefault = self.flattenAllSections(aasSections);
4267 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4268 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4269 elif sCurTag in ['@encoding', '@opencoding']:
4270 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4271
4272 #
4273 # New tag.
4274 #
4275 asSplit = sLine.split(None, 1);
4276 sCurTag = asSplit[0].lower();
4277 if len(asSplit) > 1:
4278 asCurSection = [asSplit[1],];
4279 else:
4280 asCurSection = [];
4281 aasSections = [asCurSection, ];
4282 iCurTagLine = iLine;
4283
4284 #
4285 # Process the final tag.
4286 #
4287 if not asCurSection and len(aasSections) > 1:
4288 aasSections.pop(-1);
4289 if sCurTag in self.dTagHandlers:
4290 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4291 cOpTags += 1;
4292 elif sCurTag.startswith('@op'):
4293 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4294 elif sCurTag == '@default':
4295 sFlatDefault = self.flattenAllSections(aasSections);
4296
4297 #
4298 # Don't allow default text in blocks containing @op*.
4299 #
4300 if cOpTags > 0 and sFlatDefault:
4301 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4302
4303 return True;
4304
4305 def parseMacroInvocation(self, sInvocation):
4306 """
4307 Parses a macro invocation.
4308
4309 Returns a tuple, first element is the offset following the macro
4310 invocation. The second element is a list of macro arguments, where the
4311 zero'th is the macro name.
4312 """
4313 # First the name.
4314 offOpen = sInvocation.find('(');
4315 if offOpen <= 0:
4316 self.raiseError("macro invocation open parenthesis not found");
4317 sName = sInvocation[:offOpen].strip();
4318 if not self.oReMacroName.match(sName):
4319 return self.error("invalid macro name '%s'" % (sName,));
4320 asRet = [sName, ];
4321
4322 # Arguments.
4323 iLine = self.iLine;
4324 cDepth = 1;
4325 off = offOpen + 1;
4326 offStart = off;
4327 chQuote = None;
4328 while cDepth > 0:
4329 if off >= len(sInvocation):
4330 if iLine >= len(self.asLines):
4331 self.error('macro invocation beyond end of file');
4332 return (off, asRet);
4333 sInvocation += self.asLines[iLine];
4334 iLine += 1;
4335 ch = sInvocation[off];
4336
4337 if chQuote:
4338 if ch == '\\' and off + 1 < len(sInvocation):
4339 off += 1;
4340 elif ch == chQuote:
4341 chQuote = None;
4342 elif ch in ('"', '\'',):
4343 chQuote = ch;
4344 elif ch in (',', ')',):
4345 if cDepth == 1:
4346 asRet.append(sInvocation[offStart:off].strip());
4347 offStart = off + 1;
4348 if ch == ')':
4349 cDepth -= 1;
4350 elif ch == '(':
4351 cDepth += 1;
4352 off += 1;
4353
4354 return (off, asRet);
4355
4356 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4357 """
4358 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4359 """
4360 offHit = sCode.find(sMacro);
4361 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4362 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4363 return (offHit + offAfter, asRet);
4364 return (len(sCode), None);
4365
4366 def findAndParseMacroInvocation(self, sCode, sMacro):
4367 """
4368 Returns None if not found, arguments as per parseMacroInvocation if found.
4369 """
4370 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4371
4372 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4373 """
4374 Returns same as findAndParseMacroInvocation.
4375 """
4376 for sMacro in asMacro:
4377 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4378 if asRet is not None:
4379 return asRet;
4380 return None;
4381
4382 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4383 sDisHints, sIemHints, asOperands):
4384 """
4385 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4386 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4387 """
4388 #
4389 # Some invocation checks.
4390 #
4391 if sUpper != sUpper.upper():
4392 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4393 if sLower != sLower.lower():
4394 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4395 if sUpper.lower() != sLower:
4396 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4397 if not self.oReMnemonic.match(sLower):
4398 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4399
4400 #
4401 # Check if sIemHints tells us to not consider this macro invocation.
4402 #
4403 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4404 return True;
4405
4406 # Apply to the last instruction only for now.
4407 if not self.aoCurInstrs:
4408 self.addInstruction();
4409 oInstr = self.aoCurInstrs[-1];
4410 if oInstr.iLineMnemonicMacro == -1:
4411 oInstr.iLineMnemonicMacro = self.iLine;
4412 else:
4413 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4414 % (sMacro, oInstr.iLineMnemonicMacro,));
4415
4416 # Mnemonic
4417 if oInstr.sMnemonic is None:
4418 oInstr.sMnemonic = sLower;
4419 elif oInstr.sMnemonic != sLower:
4420 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4421
4422 # Process operands.
4423 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4424 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4425 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4426 for iOperand, sType in enumerate(asOperands):
4427 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4428 if sWhere is None:
4429 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4430 if iOperand < len(oInstr.aoOperands): # error recovery.
4431 sWhere = oInstr.aoOperands[iOperand].sWhere;
4432 sType = oInstr.aoOperands[iOperand].sType;
4433 else:
4434 sWhere = 'reg';
4435 sType = 'Gb';
4436 if iOperand == len(oInstr.aoOperands):
4437 oInstr.aoOperands.append(Operand(sWhere, sType))
4438 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4439 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4440 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4441 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4442
4443 # Encoding.
4444 if sForm not in g_kdIemForms:
4445 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4446 else:
4447 if oInstr.sEncoding is None:
4448 oInstr.sEncoding = g_kdIemForms[sForm][0];
4449 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4450 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4451 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4452
4453 # Check the parameter locations for the encoding.
4454 if g_kdIemForms[sForm][1] is not None:
4455 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4456 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4457 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4458 else:
4459 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4460 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4461 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4462 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4463 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4464 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4465 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4466 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4467 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4468 or sForm.replace('VEX','').find('V') < 0) ):
4469 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4470 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4471 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4472 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4473 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4474 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4475 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4476 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4477 oInstr.aoOperands[iOperand].sWhere));
4478
4479
4480 # Check @opcodesub
4481 if oInstr.sSubOpcode \
4482 and g_kdIemForms[sForm][2] \
4483 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4484 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4485 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4486
4487 # Stats.
4488 if not self.oReStatsName.match(sStats):
4489 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4490 elif oInstr.sStats is None:
4491 oInstr.sStats = sStats;
4492 elif oInstr.sStats != sStats:
4493 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4494 % (sMacro, oInstr.sStats, sStats,));
4495
4496 # Process the hints (simply merge with @ophints w/o checking anything).
4497 for sHint in sDisHints.split('|'):
4498 sHint = sHint.strip();
4499 if sHint.startswith('DISOPTYPE_'):
4500 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4501 if sShortHint in g_kdHints:
4502 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4503 else:
4504 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4505 elif sHint != '0':
4506 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4507
4508 for sHint in sIemHints.split('|'):
4509 sHint = sHint.strip();
4510 if sHint.startswith('IEMOPHINT_'):
4511 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4512 if sShortHint in g_kdHints:
4513 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4514 else:
4515 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4516 elif sHint != '0':
4517 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4518
4519 _ = sAsm;
4520 return True;
4521
4522 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4523 """
4524 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4525 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4526 """
4527 if not asOperands:
4528 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4529 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4530 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4531
4532 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4533 """
4534 Process a IEM_MC_BEGIN macro invocation.
4535 """
4536 if self.fDebugMc:
4537 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4538 #self.debug('%s<eos>' % (sCode,));
4539
4540 # Check preconditions.
4541 if not self.oCurFunction:
4542 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4543 if self.oCurMcBlock:
4544 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4545
4546 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4547 cchIndent = offBeginStatementInCodeStr;
4548 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4549 if offPrevNewline >= 0:
4550 cchIndent -= offPrevNewline + 1;
4551 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4552
4553 # Start a new block.
4554 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4555 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4556 g_aoMcBlocks.append(self.oCurMcBlock);
4557 self.cTotalMcBlocks += 1;
4558 self.iMcBlockInFunc += 1;
4559 return True;
4560
4561 def workerIemMcEnd(self, offEndStatementInLine):
4562 """
4563 Process a IEM_MC_END macro invocation.
4564 """
4565 if self.fDebugMc:
4566 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4567
4568 # Check preconditions.
4569 if not self.oCurMcBlock:
4570 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4571
4572 #
4573 # HACK ALERT! For blocks orginating from macro expansion the start and
4574 # end line will be the same, but the line has multiple
4575 # newlines inside it. So, we have to do some extra tricks
4576 # to get the lines out of there. We ASSUME macros aren't
4577 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4578 #
4579 if self.iLine > self.oCurMcBlock.iBeginLine:
4580 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4581 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4582 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4583 else:
4584 sRawLine = self.asLines[self.iLine - 1];
4585
4586 off = sRawLine.find('\n', offEndStatementInLine);
4587 if off > 0:
4588 sRawLine = sRawLine[:off + 1];
4589
4590 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4591 sRawLine = sRawLine[off:];
4592 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4593 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4594
4595 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4596
4597 #
4598 # Strip anything following the IEM_MC_END(); statement in the final line,
4599 # so that we don't carry on any trailing 'break' after macro expansions
4600 # like for iemOp_movsb_Xb_Yb.
4601 #
4602 while asLines[-1].strip() == '':
4603 asLines.pop();
4604 sFinal = asLines[-1];
4605 offFinalEnd = sFinal.find('IEM_MC_END');
4606 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4607 offFinalEnd += len('IEM_MC_END');
4608
4609 while sFinal[offFinalEnd].isspace():
4610 offFinalEnd += 1;
4611 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4612 offFinalEnd += 1;
4613
4614 while sFinal[offFinalEnd].isspace():
4615 offFinalEnd += 1;
4616 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4617 offFinalEnd += 1;
4618
4619 while sFinal[offFinalEnd].isspace():
4620 offFinalEnd += 1;
4621 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4622 offFinalEnd += 1;
4623
4624 asLines[-1] = sFinal[: offFinalEnd];
4625
4626 #
4627 # Complete and discard the current block.
4628 #
4629 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4630 self.oCurMcBlock = None;
4631 return True;
4632
4633 def workerStartFunction(self, asArgs):
4634 """
4635 Deals with the start of a decoder function.
4636
4637 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4638 macros, so we get a argument list for these where the 0th argument is the
4639 macro name.
4640 """
4641 # Complete any existing function.
4642 if self.oCurFunction:
4643 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4644
4645 # Create the new function.
4646 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4647 return True;
4648
4649 def checkCodeForMacro(self, sCode, offLine):
4650 """
4651 Checks code for relevant macro invocation.
4652 """
4653
4654 #
4655 # Scan macro invocations.
4656 #
4657 if sCode.find('(') > 0:
4658 # Look for instruction decoder function definitions. ASSUME single line.
4659 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4660 [ 'FNIEMOP_DEF',
4661 'FNIEMOPRM_DEF',
4662 'FNIEMOP_STUB',
4663 'FNIEMOP_STUB_1',
4664 'FNIEMOP_UD_STUB',
4665 'FNIEMOP_UD_STUB_1' ]);
4666 if asArgs is not None:
4667 self.workerStartFunction(asArgs);
4668 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4669
4670 if not self.aoCurInstrs:
4671 self.addInstruction();
4672 for oInstr in self.aoCurInstrs:
4673 if oInstr.iLineFnIemOpMacro == -1:
4674 oInstr.iLineFnIemOpMacro = self.iLine;
4675 else:
4676 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4677 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4678 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4679 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4680 if asArgs[0].find('STUB') > 0:
4681 self.doneInstructions(fEndOfFunction = True);
4682 return True;
4683
4684 # Check for worker function definitions, so we can get a context for MC blocks.
4685 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4686 [ 'FNIEMOP_DEF_1',
4687 'FNIEMOP_DEF_2', ]);
4688 if asArgs is not None:
4689 self.workerStartFunction(asArgs);
4690 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4691 return True;
4692
4693 # IEMOP_HLP_DONE_VEX_DECODING_*
4694 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4695 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4696 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4697 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4698 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4699 ]);
4700 if asArgs is not None:
4701 sMacro = asArgs[0];
4702 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4703 for oInstr in self.aoCurInstrs:
4704 if 'vex_l_zero' not in oInstr.dHints:
4705 if oInstr.iLineMnemonicMacro >= 0:
4706 self.errorOnLine(oInstr.iLineMnemonicMacro,
4707 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4708 oInstr.dHints['vex_l_zero'] = True;
4709
4710 #
4711 # IEMOP_MNEMONIC*
4712 #
4713 if sCode.find('IEMOP_MNEMONIC') >= 0:
4714 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4715 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4716 if asArgs is not None:
4717 if len(self.aoCurInstrs) == 1:
4718 oInstr = self.aoCurInstrs[0];
4719 if oInstr.sStats is None:
4720 oInstr.sStats = asArgs[1];
4721 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4722
4723 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4724 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4725 if asArgs is not None:
4726 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4727 asArgs[7], []);
4728 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4729 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4730 if asArgs is not None:
4731 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4732 asArgs[8], [asArgs[6],]);
4733 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4734 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4735 if asArgs is not None:
4736 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4737 asArgs[9], [asArgs[6], asArgs[7]]);
4738 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4739 # a_fIemHints)
4740 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4741 if asArgs is not None:
4742 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4743 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4744 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4745 # a_fIemHints)
4746 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4747 if asArgs is not None:
4748 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4749 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4750
4751 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4752 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4753 if asArgs is not None:
4754 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4755 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4756 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4757 if asArgs is not None:
4758 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4759 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4760 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4761 if asArgs is not None:
4762 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4763 [asArgs[4], asArgs[5],]);
4764 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4765 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4766 if asArgs is not None:
4767 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4768 [asArgs[4], asArgs[5], asArgs[6],]);
4769 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4770 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4771 if asArgs is not None:
4772 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4773 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4774
4775 #
4776 # IEM_MC_BEGIN + IEM_MC_END.
4777 # We must support multiple instances per code snippet.
4778 #
4779 offCode = sCode.find('IEM_MC_');
4780 if offCode >= 0:
4781 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4782 if oMatch.group(1) == 'END':
4783 self.workerIemMcEnd(offLine + oMatch.start());
4784 else:
4785 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4786 return True;
4787
4788 return False;
4789
4790 def workerPreProcessRecreateMacroRegex(self):
4791 """
4792 Recreates self.oReMacros when self.dMacros changes.
4793 """
4794 if self.dMacros:
4795 sRegex = '';
4796 for sName, oMacro in self.dMacros.items():
4797 if sRegex:
4798 sRegex += '|' + sName;
4799 else:
4800 sRegex = '\\b(' + sName;
4801 if oMacro.asArgs is not None:
4802 sRegex += '\s*\(';
4803 else:
4804 sRegex += '\\b';
4805 sRegex += ')';
4806 self.oReMacros = re.compile(sRegex);
4807 else:
4808 self.oReMacros = None;
4809 return True;
4810
4811 def workerPreProcessDefine(self, sRest):
4812 """
4813 Handles a macro #define, the sRest is what follows after the directive word.
4814 """
4815
4816 #
4817 # If using line continutation, just concat all the lines together,
4818 # preserving the newline character but not the escaping.
4819 #
4820 iLineStart = self.iLine;
4821 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4822 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4823 self.iLine += 1;
4824 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4825
4826 #
4827 # Use regex to split out the name, argument list and body.
4828 # If this fails, we assume it's a simple macro.
4829 #
4830 oMatch = self.oReHashDefine2.match(sRest);
4831 if oMatch:
4832 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4833 sBody = oMatch.group(3);
4834 else:
4835 oMatch = self.oReHashDefine3.match(sRest);
4836 if not oMatch:
4837 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4838 return self.error('bogus macro definition: %s' % (sRest,));
4839 asArgs = None;
4840 sBody = oMatch.group(2);
4841 sName = oMatch.group(1);
4842 assert sName == sName.strip();
4843 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4844
4845 #
4846 # Is this of any interest to us? We do NOT support MC blocks wihtin
4847 # nested macro expansion, just to avoid lots of extra work.
4848 #
4849 if sBody.find("IEM_MC_BEGIN") < 0:
4850 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4851 return True;
4852
4853 #
4854 # Add the macro.
4855 #
4856 if self.fDebugPreProc:
4857 self.debug('#define %s on line %u' % (sName, self.iLine,));
4858 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4859 return self.workerPreProcessRecreateMacroRegex();
4860
4861 def workerPreProcessUndef(self, sRest):
4862 """
4863 Handles a macro #undef, the sRest is what follows after the directive word.
4864 """
4865 # Quick comment strip and isolate the name.
4866 offSlash = sRest.find('/');
4867 if offSlash > 0:
4868 sRest = sRest[:offSlash];
4869 sName = sRest.strip();
4870
4871 # Remove the macro if we're clocking it.
4872 if sName in self.dMacros:
4873 if self.fDebugPreProc:
4874 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4875 del self.dMacros[sName];
4876 return self.workerPreProcessRecreateMacroRegex();
4877
4878 return True;
4879
4880 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4881 """
4882 Handles a preprocessor directive.
4883 """
4884 oMatch = self.oReHashDefine.match(sLine);
4885 if oMatch:
4886 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4887
4888 oMatch = self.oReHashUndef.match(sLine);
4889 if oMatch:
4890 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4891 return False;
4892
4893 def expandMacros(self, sLine, oMatch):
4894 """
4895 Expands macros we know about in the given line.
4896 Currently we ASSUME there is only one and that is what oMatch matched.
4897 """
4898 #
4899 # Get our bearings.
4900 #
4901 offMatch = oMatch.start();
4902 sName = oMatch.group(1);
4903 assert sName == sLine[oMatch.start() : oMatch.end()];
4904 fWithArgs = sName.endswith('(');
4905 if fWithArgs:
4906 sName = sName[:-1].strip();
4907 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4908
4909 #
4910 # Deal with simple macro invocations w/o parameters.
4911 #
4912 if not fWithArgs:
4913 if self.fDebugPreProc:
4914 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4915 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4916
4917 #
4918 # Complicated macro with parameters.
4919 # Start by extracting the parameters. ASSUMES they are all on the same line!
4920 #
4921 cLevel = 1;
4922 offCur = oMatch.end();
4923 offCurArg = offCur;
4924 asArgs = [];
4925 while True:
4926 if offCur >= len(sLine):
4927 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4928 ch = sLine[offCur];
4929 if ch == '(':
4930 cLevel += 1;
4931 elif ch == ')':
4932 cLevel -= 1;
4933 if cLevel == 0:
4934 asArgs.append(sLine[offCurArg:offCur].strip());
4935 break;
4936 elif ch == ',' and cLevel == 1:
4937 asArgs.append(sLine[offCurArg:offCur].strip());
4938 offCurArg = offCur + 1;
4939 offCur += 1;
4940 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4941 asArgs = [];
4942 if len(oMacro.asArgs) != len(asArgs):
4943 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4944
4945 #
4946 # Do the expanding.
4947 #
4948 if self.fDebugPreProc:
4949 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4950 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4951
4952 def parse(self):
4953 """
4954 Parses the given file.
4955 Returns number or errors.
4956 Raises exception on fatal trouble.
4957 """
4958 #self.debug('Parsing %s' % (self.sSrcFile,));
4959
4960 while self.iLine < len(self.asLines):
4961 sLine = self.asLines[self.iLine];
4962 self.iLine += 1;
4963 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4964
4965 # Expand macros we know about if we're currently in code.
4966 if self.iState == self.kiCode and self.oReMacros:
4967 oMatch = self.oReMacros.search(sLine);
4968 if oMatch:
4969 sLine = self.expandMacros(sLine, oMatch);
4970 if self.fDebugPreProc:
4971 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4972 self.asLines[self.iLine - 1] = sLine;
4973
4974 # Look for comments.
4975 offSlash = sLine.find('/');
4976 if offSlash >= 0:
4977 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4978 offLine = 0;
4979 while offLine < len(sLine):
4980 if self.iState == self.kiCode:
4981 # Look for substantial multiline comment so we pass the following MC as a whole line:
4982 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4983 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4984 offHit = sLine.find('/*', offLine);
4985 while offHit >= 0:
4986 offEnd = sLine.find('*/', offHit + 2);
4987 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4988 break;
4989 offHit = sLine.find('/*', offEnd);
4990
4991 if offHit >= 0:
4992 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4993 self.sComment = '';
4994 self.iCommentLine = self.iLine;
4995 self.iState = self.kiCommentMulti;
4996 offLine = offHit + 2;
4997 else:
4998 self.checkCodeForMacro(sLine[offLine:], offLine);
4999 offLine = len(sLine);
5000
5001 elif self.iState == self.kiCommentMulti:
5002 offHit = sLine.find('*/', offLine);
5003 if offHit >= 0:
5004 self.sComment += sLine[offLine:offHit];
5005 self.iState = self.kiCode;
5006 offLine = offHit + 2;
5007 self.parseComment();
5008 else:
5009 self.sComment += sLine[offLine:];
5010 offLine = len(sLine);
5011 else:
5012 assert False;
5013 # C++ line comment.
5014 elif offSlash > 0:
5015 self.checkCodeForMacro(sLine[:offSlash], 0);
5016
5017 # No slash, but append the line if in multi-line comment.
5018 elif self.iState == self.kiCommentMulti:
5019 #self.debug('line %d: multi' % (self.iLine,));
5020 self.sComment += sLine;
5021
5022 # No slash, but check if this is a macro #define or #undef, since we
5023 # need to be able to selectively expand the ones containing MC blocks.
5024 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5025 if self.fDebugPreProc:
5026 self.debug('line %d: pre-proc' % (self.iLine,));
5027 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5028
5029 # No slash, but check code line for relevant macro.
5030 elif ( self.iState == self.kiCode
5031 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5032 #self.debug('line %d: macro' % (self.iLine,));
5033 self.checkCodeForMacro(sLine, 0);
5034
5035 # If the line is a '}' in the first position, complete the instructions.
5036 elif self.iState == self.kiCode and sLine[0] == '}':
5037 #self.debug('line %d: }' % (self.iLine,));
5038 self.doneInstructions(fEndOfFunction = True);
5039
5040 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5041 # so we can check/add @oppfx info from it.
5042 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5043 self.parseFunctionTable(sLine);
5044
5045 self.doneInstructions(fEndOfFunction = True);
5046 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5047 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5048 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5049 return self.printErrors();
5050
5051## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5052g_oParsedCommonBodyMacros = None # type: SimpleParser
5053
5054def __parseFileByName(sSrcFile, sDefaultMap):
5055 """
5056 Parses one source file for instruction specfications.
5057 """
5058 #
5059 # Read sSrcFile into a line array.
5060 #
5061 try:
5062 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5063 except Exception as oXcpt:
5064 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5065 try:
5066 asLines = oFile.readlines();
5067 except Exception as oXcpt:
5068 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5069 finally:
5070 oFile.close();
5071
5072 #
5073 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5074 # can use the macros from it when processing the other files.
5075 #
5076 global g_oParsedCommonBodyMacros;
5077 if g_oParsedCommonBodyMacros is None:
5078 # Locate the file.
5079 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5080 if not os.path.isfile(sCommonBodyMacros):
5081 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5082
5083 # Read it.
5084 try:
5085 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5086 asIncFiles = oIncFile.readlines();
5087 except Exception as oXcpt:
5088 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5089
5090 # Parse it.
5091 try:
5092 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5093 if oParser.parse() != 0:
5094 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5095 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5096 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5097 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5098 oParser.cTotalMcBlocks,
5099 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5100 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5101 except ParserException as oXcpt:
5102 print(str(oXcpt), file = sys.stderr);
5103 raise;
5104 g_oParsedCommonBodyMacros = oParser;
5105
5106 #
5107 # Do the parsing.
5108 #
5109 try:
5110 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5111 return (oParser.parse(), oParser) ;
5112 except ParserException as oXcpt:
5113 print(str(oXcpt), file = sys.stderr);
5114 raise;
5115
5116
5117def __doTestCopying():
5118 """
5119 Executes the asCopyTests instructions.
5120 """
5121 asErrors = [];
5122 for oDstInstr in g_aoAllInstructions:
5123 if oDstInstr.asCopyTests:
5124 for sSrcInstr in oDstInstr.asCopyTests:
5125 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5126 if oSrcInstr:
5127 aoSrcInstrs = [oSrcInstr,];
5128 else:
5129 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5130 if aoSrcInstrs:
5131 for oSrcInstr in aoSrcInstrs:
5132 if oSrcInstr != oDstInstr:
5133 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5134 else:
5135 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5136 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5137 else:
5138 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5139 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5140
5141 if asErrors:
5142 sys.stderr.write(u''.join(asErrors));
5143 return len(asErrors);
5144
5145
5146def __applyOnlyTest():
5147 """
5148 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5149 all other instructions so that only these get tested.
5150 """
5151 if g_aoOnlyTestInstructions:
5152 for oInstr in g_aoAllInstructions:
5153 if oInstr.aoTests:
5154 if oInstr not in g_aoOnlyTestInstructions:
5155 oInstr.aoTests = [];
5156 return 0;
5157
5158## List of all main instruction files and their default maps.
5159g_aasAllInstrFilesAndDefaultMap = (
5160 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5161 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5162 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5163 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5164 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5165 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5166 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5167 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5168 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5169);
5170
5171def __parseFilesWorker(asFilesAndDefaultMap):
5172 """
5173 Parses all the IEMAllInstruction*.cpp.h files.
5174
5175 Returns a list of the parsers on success.
5176 Raises exception on failure.
5177 """
5178 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5179 cErrors = 0;
5180 aoParsers = [];
5181 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5182 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5183 sFilename = os.path.join(sSrcDir, sFilename);
5184 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5185 cErrors += cThisErrors;
5186 aoParsers.append(oParser);
5187 cErrors += __doTestCopying();
5188 cErrors += __applyOnlyTest();
5189
5190 # Total stub stats:
5191 cTotalStubs = 0;
5192 for oInstr in g_aoAllInstructions:
5193 cTotalStubs += oInstr.fStub;
5194 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5195 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5196 file = sys.stderr);
5197
5198 if cErrors != 0:
5199 raise Exception('%d parse errors' % (cErrors,));
5200 return aoParsers;
5201
5202
5203def parseFiles(asFiles):
5204 """
5205 Parses a selection of IEMAllInstruction*.cpp.h files.
5206
5207 Returns a list of the parsers on success.
5208 Raises exception on failure.
5209 """
5210 # Look up default maps for the files and call __parseFilesWorker to do the job.
5211 asFilesAndDefaultMap = [];
5212 for sFilename in asFiles:
5213 sName = os.path.split(sFilename)[1].lower();
5214 sMap = None;
5215 for asCur in g_aasAllInstrFilesAndDefaultMap:
5216 if asCur[0].lower() == sName:
5217 sMap = asCur[1];
5218 break;
5219 if not sMap:
5220 raise Exception('Unable to classify file: %s' % (sFilename,));
5221 asFilesAndDefaultMap.append((sFilename, sMap));
5222
5223 return __parseFilesWorker(asFilesAndDefaultMap);
5224
5225
5226def parseAll():
5227 """
5228 Parses all the IEMAllInstruction*.cpp.h files.
5229
5230 Returns a list of the parsers on success.
5231 Raises exception on failure.
5232 """
5233 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5234
5235
5236#
5237# Generators (may perhaps move later).
5238#
5239def __formatDisassemblerTableEntry(oInstr):
5240 """
5241 """
5242 sMacro = 'OP';
5243 cMaxOperands = 3;
5244 if len(oInstr.aoOperands) > 3:
5245 sMacro = 'OPVEX'
5246 cMaxOperands = 4;
5247 assert len(oInstr.aoOperands) <= cMaxOperands;
5248
5249 #
5250 # Format string.
5251 #
5252 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5253 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5254 sTmp += ' ' if iOperand == 0 else ',';
5255 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5256 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5257 else:
5258 sTmp += g_kdOpTypes[oOperand.sType][2];
5259 sTmp += '",';
5260 asColumns = [ sTmp, ];
5261
5262 #
5263 # Decoders.
5264 #
5265 iStart = len(asColumns);
5266 if oInstr.sEncoding is None:
5267 pass;
5268 elif oInstr.sEncoding == 'ModR/M':
5269 # ASSUME the first operand is using the ModR/M encoding
5270 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5271 asColumns.append('IDX_ParseModRM,');
5272 elif oInstr.sEncoding in [ 'prefix', ]:
5273 for oOperand in oInstr.aoOperands:
5274 asColumns.append('0,');
5275 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5276 pass;
5277 elif oInstr.sEncoding == 'VEX.ModR/M':
5278 asColumns.append('IDX_ParseModRM,');
5279 elif oInstr.sEncoding == 'vex2':
5280 asColumns.append('IDX_ParseVex2b,')
5281 elif oInstr.sEncoding == 'vex3':
5282 asColumns.append('IDX_ParseVex3b,')
5283 elif oInstr.sEncoding in g_dInstructionMaps:
5284 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5285 else:
5286 ## @todo
5287 #IDX_ParseTwoByteEsc,
5288 #IDX_ParseGrp1,
5289 #IDX_ParseShiftGrp2,
5290 #IDX_ParseGrp3,
5291 #IDX_ParseGrp4,
5292 #IDX_ParseGrp5,
5293 #IDX_Parse3DNow,
5294 #IDX_ParseGrp6,
5295 #IDX_ParseGrp7,
5296 #IDX_ParseGrp8,
5297 #IDX_ParseGrp9,
5298 #IDX_ParseGrp10,
5299 #IDX_ParseGrp12,
5300 #IDX_ParseGrp13,
5301 #IDX_ParseGrp14,
5302 #IDX_ParseGrp15,
5303 #IDX_ParseGrp16,
5304 #IDX_ParseThreeByteEsc4,
5305 #IDX_ParseThreeByteEsc5,
5306 #IDX_ParseModFence,
5307 #IDX_ParseEscFP,
5308 #IDX_ParseNopPause,
5309 #IDX_ParseInvOpModRM,
5310 assert False, str(oInstr);
5311
5312 # Check for immediates and stuff in the remaining operands.
5313 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5314 sIdx = g_kdOpTypes[oOperand.sType][0];
5315 #if sIdx != 'IDX_UseModRM':
5316 asColumns.append(sIdx + ',');
5317 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5318
5319 #
5320 # Opcode and operands.
5321 #
5322 assert oInstr.sDisEnum, str(oInstr);
5323 asColumns.append(oInstr.sDisEnum + ',');
5324 iStart = len(asColumns)
5325 for oOperand in oInstr.aoOperands:
5326 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5327 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5328
5329 #
5330 # Flags.
5331 #
5332 sTmp = '';
5333 for sHint in sorted(oInstr.dHints.keys()):
5334 sDefine = g_kdHints[sHint];
5335 if sDefine.startswith('DISOPTYPE_'):
5336 if sTmp:
5337 sTmp += ' | ' + sDefine;
5338 else:
5339 sTmp += sDefine;
5340 if sTmp:
5341 sTmp += '),';
5342 else:
5343 sTmp += '0),';
5344 asColumns.append(sTmp);
5345
5346 #
5347 # Format the columns into a line.
5348 #
5349 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5350 sLine = '';
5351 for i, s in enumerate(asColumns):
5352 if len(sLine) < aoffColumns[i]:
5353 sLine += ' ' * (aoffColumns[i] - len(sLine));
5354 else:
5355 sLine += ' ';
5356 sLine += s;
5357
5358 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5359 # DISOPTYPE_HARMLESS),
5360 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5361 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5362 return sLine;
5363
5364def __checkIfShortTable(aoTableOrdered, oMap):
5365 """
5366 Returns (iInstr, cInstructions, fShortTable)
5367 """
5368
5369 # Determin how much we can trim off.
5370 cInstructions = len(aoTableOrdered);
5371 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5372 cInstructions -= 1;
5373
5374 iInstr = 0;
5375 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5376 iInstr += 1;
5377
5378 # If we can save more than 30%, we go for the short table version.
5379 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5380 return (iInstr, cInstructions, True);
5381 _ = oMap; # Use this for overriding.
5382
5383 # Output the full table.
5384 return (0, len(aoTableOrdered), False);
5385
5386def generateDisassemblerTables(oDstFile = sys.stdout):
5387 """
5388 Generates disassembler tables.
5389
5390 Returns exit code.
5391 """
5392
5393 #
5394 # Parse all.
5395 #
5396 try:
5397 parseAll();
5398 except Exception as oXcpt:
5399 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5400 traceback.print_exc(file = sys.stderr);
5401 return 1;
5402
5403
5404 #
5405 # The disassembler uses a slightly different table layout to save space,
5406 # since several of the prefix varia
5407 #
5408 aoDisasmMaps = [];
5409 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5410 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5411 if oMap.sSelector != 'byte+pfx':
5412 aoDisasmMaps.append(oMap);
5413 else:
5414 # Split the map by prefix.
5415 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5416 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5417 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5418 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5419
5420 #
5421 # Dump each map.
5422 #
5423 asHeaderLines = [];
5424 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5425 for oMap in aoDisasmMaps:
5426 sName = oMap.sName;
5427
5428 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5429
5430 #
5431 # Get the instructions for the map and see if we can do a short version or not.
5432 #
5433 aoTableOrder = oMap.getInstructionsInTableOrder();
5434 cEntriesPerByte = oMap.getEntriesPerByte();
5435 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5436
5437 #
5438 # Output the table start.
5439 # Note! Short tables are static and only accessible via the map range record.
5440 #
5441 asLines = [];
5442 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5443 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5444 if fShortTable:
5445 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5446 else:
5447 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5448 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5449 asLines.append('{');
5450
5451 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5452 asLines.append(' /* %#04x: */' % (iInstrStart,));
5453
5454 #
5455 # Output the instructions.
5456 #
5457 iInstr = iInstrStart;
5458 while iInstr < iInstrEnd:
5459 oInstr = aoTableOrder[iInstr];
5460 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5461 if iInstr != iInstrStart:
5462 asLines.append('');
5463 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5464
5465 if oInstr is None:
5466 # Invalid. Optimize blocks of invalid instructions.
5467 cInvalidInstrs = 1;
5468 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5469 cInvalidInstrs += 1;
5470 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5471 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5472 iInstr += 0x10 * cEntriesPerByte - 1;
5473 elif cEntriesPerByte > 1:
5474 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5475 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5476 iInstr += 3;
5477 else:
5478 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5479 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5480 else:
5481 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5482 elif isinstance(oInstr, list):
5483 if len(oInstr) != 0:
5484 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5485 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5486 else:
5487 asLines.append(__formatDisassemblerTableEntry(oInstr));
5488 else:
5489 asLines.append(__formatDisassemblerTableEntry(oInstr));
5490
5491 iInstr += 1;
5492
5493 if iInstrStart >= iInstrEnd:
5494 asLines.append(' /* dummy */ INVALID_OPCODE');
5495
5496 asLines.append('};');
5497 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5498
5499 #
5500 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5501 #
5502 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5503 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5504 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5505
5506 #
5507 # Write out the lines.
5508 #
5509 oDstFile.write('\n'.join(asLines));
5510 oDstFile.write('\n');
5511 oDstFile.write('\n');
5512 #break; #for now
5513 return 0;
5514
5515if __name__ == '__main__':
5516 sys.exit(generateDisassemblerTables());
5517
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette